* [PATCH 1/8] lei: support -c <name>=<value> to overrides
2021-03-22 7:53 [PATCH 0/8] lei input handling improvements Eric Wong
@ 2021-03-22 7:53 ` Eric Wong
2021-03-22 7:53 ` [PATCH 2/8] net_reader: escape nasty chars from Net::NNTP->message Eric Wong
` (6 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Eric Wong @ 2021-03-22 7:53 UTC (permalink / raw)
To: meta
It's a bit nasty, but seems to mostly work for debugging
IMAP and NNTP commands.
---
lib/PublicInbox/LEI.pm | 109 ++++++++++++++++++++++-----------
lib/PublicInbox/LeiExternal.pm | 2 +-
t/lei.t | 9 +++
3 files changed, 83 insertions(+), 37 deletions(-)
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index b6d21af6..9e3bb9b7 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -119,6 +119,8 @@ sub index_opt {
batch_size|batch-size=s skip-docdata)
}
+my @c_opt = qw(c=s@ C=s@ quiet|q);
+
# we generate shell completion + help using %CMD and %OPTDESC,
# see lei__complete() and PublicInbox::LeiHelp
# command => [ positional_args, 1-line description, Getopt::Long option spec ]
@@ -129,82 +131,80 @@ our %CMD = ( # sorted in order of importance/use:
sort|s=s reverse|r offset=i remote! local! external! pretty
include|I=s@ exclude=s@ only=s@ jobs|j=s globoff|g augment|a
import-remote! import-before! lock=s@ rsyncable
- alert=s@ mua=s no-torsocks torsocks=s verbose|v+ quiet|q C=s@),
+ alert=s@ mua=s no-torsocks torsocks=s verbose|v+), @c_opt,
PublicInbox::LeiQuery::curl_opt(), opt_dash('limit|n=i', '[0-9]+') ],
'show' => [ 'MID|OID', 'show a given object (Message-ID or object ID)',
- qw(type=s solve! format|f=s dedupe|d=s threads|t remote local! C=s@),
- pass_through('git show') ],
+ qw(type=s solve! format|f=s dedupe|d=s threads|t remote local!
+ verbose|v+), @c_opt, pass_through('git show') ],
'add-external' => [ 'LOCATION',
'add/set priority of a publicinbox|extindex for extra matches',
- qw(boost=i c=s@ mirror=s no-torsocks torsocks=s inbox-version=i),
- qw(quiet|q verbose|v+ C=s@),
- index_opt(), PublicInbox::LeiQuery::curl_opt() ],
+ qw(boost=i mirror=s no-torsocks torsocks=s inbox-version=i
+ verbose|v+), @c_opt, index_opt(),
+ PublicInbox::LeiQuery::curl_opt() ],
'ls-external' => [ '[FILTER]', 'list publicinbox|extindex locations',
- qw(format|f=s z|0 globoff|g invert-match|v local remote C=s@) ],
+ qw(format|f=s z|0 globoff|g invert-match|v local remote), @c_opt ],
'forget-external' => [ 'LOCATION...|--prune',
'exclude further results from a publicinbox|extindex',
- qw(prune quiet|q C=s@) ],
+ qw(prune), @c_opt ],
'ls-query' => [ '[FILTER...]', 'list saved search queries',
- qw(name-only format|f=s C=s@) ],
-'rm-query' => [ 'QUERY_NAME', 'remove a saved search', qw(C=s@) ],
-'mv-query' => [ qw(OLD_NAME NEW_NAME), 'rename a saved search', qw(C=s@) ],
+ qw(name-only format|f=s), @c_opt ],
+'rm-query' => [ 'QUERY_NAME', 'remove a saved search', @c_opt ],
+'mv-query' => [ qw(OLD_NAME NEW_NAME), 'rename a saved search', @c_opt ],
'plonk' => [ '--threads|--from=IDENT',
'exclude mail matching From: or threads from non-Message-ID searches',
- qw(stdin| threads|t from|f=s mid=s oid=s C=s@) ],
+ qw(stdin| threads|t from|f=s mid=s oid=s), @c_opt ],
'mark' => [ 'MESSAGE_FLAGS...',
'set/unset keywords on message(s) from stdin',
- qw(stdin| oid=s exact by-mid|mid:s C=s@) ],
+ qw(stdin| oid=s exact by-mid|mid:s), @c_opt ],
'forget' => [ '[--stdin|--oid=OID|--by-mid=MID]',
"exclude message(s) on stdin from `q' search results",
- qw(stdin| oid=s exact by-mid|mid:s quiet|q C=s@) ],
+ qw(stdin| oid=s exact by-mid|mid:s), @c_opt ],
'purge-mailsource' => [ 'LOCATION|--all',
'remove imported messages from IMAP, Maildirs, and MH',
- qw(exact! all jobs:i indexed C=s@) ],
+ qw(exact! all jobs:i indexed), @c_opt ],
# code repos are used for `show' to solve blobs from patch mails
'add-coderepo' => [ 'DIRNAME', 'add or set priority of a git code repo',
- qw(boost=i C=s@) ],
+ qw(boost=i), @c_opt ],
'ls-coderepo' => [ '[FILTER_TERMS...]',
- 'list known code repos', qw(format|f=s z C=s@) ],
+ 'list known code repos', qw(format|f=s z), @c_opt ],
'forget-coderepo' => [ 'DIRNAME',
'stop using repo to solve blobs from patches',
- qw(prune C=s@) ],
+ qw(prune), @c_opt ],
'add-watch' => [ 'LOCATION', 'watch for new messages and flag changes',
qw(import! kw|keywords|flags! interval=s recursive|r
- exclude=s include=s C=s@) ],
+ exclude=s include=s), @c_opt ],
'ls-watch' => [ '[FILTER...]', 'list active watches with numbers and status',
- qw(format|f=s z C=s@) ],
-'pause-watch' => [ '[WATCH_NUMBER_OR_FILTER]', qw(all local remote C=s@) ],
-'resume-watch' => [ '[WATCH_NUMBER_OR_FILTER]', qw(all local remote C=s@) ],
+ qw(format|f=s z), @c_opt ],
+'pause-watch' => [ '[WATCH_NUMBER_OR_FILTER]', qw(all local remote), @c_opt ],
+'resume-watch' => [ '[WATCH_NUMBER_OR_FILTER]', qw(all local remote), @c_opt ],
'forget-watch' => [ '{WATCH_NUMBER|--prune}', 'stop and forget a watch',
- qw(prune C=s@) ],
+ qw(prune), @c_opt ],
'import' => [ 'LOCATION...|--stdin',
'one-time import/update from URL or filesystem',
qw(stdin| offset=i recursive|r exclude=s include|I=s
- lock=s@ in-format|F=s kw|keywords|flags! C=s@),
- ],
+ lock=s@ in-format|F=s kw|keywords|flags! verbose|v+), @c_opt ],
'convert' => [ 'LOCATION...|--stdin',
'one-time conversion from URL or filesystem to another format',
- qw(stdin| in-format|F=s out-format|f=s output|mfolder|o=s quiet|q
- lock=s@ kw|keywords|flags! C=s@),
- ],
+ qw(stdin| in-format|F=s out-format|f=s output|mfolder|o=s
+ lock=s@ kw|keywords|flags!), @c_opt ],
'p2q' => [ 'FILE|COMMIT_OID|--stdin',
"use a patch to generate a query for `lei q --stdin'",
- qw(stdin| want|w=s@ uri debug) ],
+ qw(stdin| want|w=s@ uri debug), @c_opt ],
'config' => [ '[...]', sub {
'git-config(1) wrapper for '._config_path($_[0]);
}, qw(config-file|system|global|file|f=s), # for conflict detection
- qw(C=s@), pass_through('git config') ],
+ qw(c=s@ C=s@), pass_through('git config') ],
'init' => [ '[DIRNAME]', sub {
"initialize storage, default: ".store_path($_[0]);
- }, qw(quiet|q C=s@) ],
+ }, @c_opt ],
'daemon-kill' => [ '[-SIGNAL]', 'signal the lei-daemon',
# "-C DIR" conflicts with -CHLD, here, and chdir makes no sense, here
opt_dash('signal|s=s', '[0-9]+|(?:[A-Z][A-Z0-9]+)') ],
@@ -216,7 +216,7 @@ our %CMD = ( # sorted in order of importance/use:
'reorder-local-store-and-break-history' => [ '[REFNAME]',
'rewrite git history in an attempt to improve compression',
- qw(gc! C=s@) ],
+ qw(gc!), @c_opt ],
# internal commands are prefixed with '_'
'_complete' => [ '[...]', 'internal shell completion helper',
@@ -235,6 +235,7 @@ my $ls_format = [ 'OUT|plain|json|null', 'listing output format' ];
# we use \x{a0} (non-breaking SP) to avoid wrapping in PublicInbox::LeiHelp
my %OPTDESC = (
'help|h' => 'show this built-in help',
+'c=s@' => [ 'NAME=VALUE', 'set config option' ],
'C=s@' => [ 'DIR', 'chdir to specify to directory' ],
'quiet|q' => 'be quiet',
'lock=s@' => [ 'METHOD|dotlock|fcntl|flock|none',
@@ -472,7 +473,8 @@ sub lei_atfork_child {
unless ($self->{oneshot}) {
close($_) for @io;
}
- } else {
+ } else { # worker, Net::NNTP (Net::Cmd) uses STDERR directly
+ open STDERR, '+>&='.fileno($self->{2}) or warn "open $!";
delete $self->{0};
}
delete @$self{qw(cnv)};
@@ -586,14 +588,47 @@ sub optparse ($$$) {
$err ? fail($self, "usage: lei $cmd $proto\nE: $err") : 1;
}
+sub _tmp_cfg { # for lei -c <name>=<value> ...
+ my ($self) = @_;
+ my $cfg = _lei_cfg($self, 1);
+ require File::Temp;
+ my $ft = File::Temp->new(TEMPLATE => 'lei_cfg-XXXX', TMPDIR => 1);
+ my $tmp = { '-f' => $ft->filename, -tmp => $ft };
+ $ft->autoflush(1);
+ print $ft <<EOM or return fail($self, "$tmp->{-f}: $!");
+[include]
+ path = $cfg->{-f}
+EOM
+ $tmp = $self->{cfg} = bless { %$cfg, %$tmp }, ref($cfg);
+ for (@{$self->{opt}->{c}}) {
+ /\A([^=\.]+\.[^=]+)(?:=(.*))?\z/ or return fail($self, <<EOM);
+`-c $_' is not of the form -c <name>=<value>'
+EOM
+ my $name = $1;
+ my $value = $2 // 1;
+ _config($self, '--add', $name, $value);
+ if (defined(my $v = $tmp->{$name})) {
+ if (ref($v) eq 'ARRAY') {
+ push @$v, $value;
+ } else {
+ $tmp->{$name} = [ $v, $value ];
+ }
+ } else {
+ $tmp->{$name} = $value;
+ }
+ }
+}
+
sub dispatch {
my ($self, $cmd, @argv) = @_;
local $current_lei = $self; # for __WARN__
dump_and_clear_log("from previous run\n");
return _help($self, 'no command given') unless defined($cmd);
- while ($cmd eq '-C') { # do not support Getopt bundling for this
- my $d = shift(@argv) // return fail($self, '-C DIRECTORY');
- push @{$self->{opt}->{C}}, $d;
+ # do not support Getopt bundling for this
+ while ($cmd eq '-C' || $cmd eq '-c') {
+ my $v = shift(@argv) // return fail($self, $cmd eq '-C' ?
+ '-C DIRECTORY' : '-c <name>=<value>');
+ push @{$self->{opt}->{substr($cmd, 1, 1)}}, $v;
$cmd = shift(@argv) // return _help($self, 'no command given');
}
my $func = "lei_$cmd";
@@ -605,6 +640,7 @@ sub dispatch {
} : undef);
if ($cb) {
optparse($self, $cmd, \@argv) or return;
+ $self->{opt}->{c} and (_tmp_cfg($self) // return);
if (my $chdir = $self->{opt}->{C}) {
for my $d (@$chdir) {
next if $d eq ''; # same as git(1)
@@ -623,6 +659,7 @@ sub dispatch {
sub _lei_cfg ($;$) {
my ($self, $creat) = @_;
+ return $self->{cfg} if $self->{cfg};
my $f = _config_path($self);
my @st = stat($f);
my $cur_st = @st ? pack('dd', $st[10], $st[7]) : ''; # 10:ctime, 7:size
diff --git a/lib/PublicInbox/LeiExternal.pm b/lib/PublicInbox/LeiExternal.pm
index f4e24c2a..9a555831 100644
--- a/lib/PublicInbox/LeiExternal.pm
+++ b/lib/PublicInbox/LeiExternal.pm
@@ -149,7 +149,7 @@ sub lei_add_external {
my $mirror = $opt->{mirror} // do {
my @fail;
for my $sw ($self->index_opt, $self->curl_opt,
- qw(c no-torsocks torsocks inbox-version)) {
+ qw(no-torsocks torsocks inbox-version)) {
my ($f) = (split(/|/, $sw, 2))[0];
next unless defined $opt->{$f};
$f = length($f) == 1 ? "-$f" : "--$f";
diff --git a/t/lei.t b/t/lei.t
index 2bf4b862..0cf97866 100644
--- a/t/lei.t
+++ b/t/lei.t
@@ -93,6 +93,15 @@ my $test_config = sub {
'config set var with -f fails');
like($lei_err, qr/not supported/, 'not supported noted');
ok(!-f "$home/config/f", 'no file created');
+
+ lei_ok(qw(-c imap.debug config --bool imap.debug));
+ is($lei_out, "true\n", "-c sets w/o value");
+ lei_ok(qw(-c imap.debug=1 config --bool imap.debug));
+ is($lei_out, "true\n", "-c coerces value");
+ lei_ok(qw(-c imap.debug=tr00 config imap.debug));
+ is($lei_out, "tr00\n", "-c string value passed as-is");
+ lei_ok(qw(-c imap.debug=a -c imap.debug=b config --get-all imap.debug));
+ is($lei_out, "a\nb\n", '-c and --get-all work together');
};
my $test_completion = sub {
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 2/8] net_reader: escape nasty chars from Net::NNTP->message
2021-03-22 7:53 [PATCH 0/8] lei input handling improvements Eric Wong
2021-03-22 7:53 ` [PATCH 1/8] lei: support -c <name>=<value> to overrides Eric Wong
@ 2021-03-22 7:53 ` Eric Wong
2021-03-22 7:53 ` [PATCH 3/8] lei: share input code between convert and import Eric Wong
` (5 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Eric Wong @ 2021-03-22 7:53 UTC (permalink / raw)
To: meta
Net::Cmd::message (used by Net::NNTP) does no escaping at all,
so "\r" was causing confusing/nonsensical error messages when
I tried to import from the wrong group.
---
lib/PublicInbox/NetReader.pm | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/lib/PublicInbox/NetReader.pm b/lib/PublicInbox/NetReader.pm
index d3094fc7..bc211029 100644
--- a/lib/PublicInbox/NetReader.pm
+++ b/lib/PublicInbox/NetReader.pm
@@ -7,11 +7,15 @@ use strict;
use v5.10.1;
use parent qw(Exporter PublicInbox::IPC);
use PublicInbox::Eml;
-
our %IMAPflags2kw = map {; "\\\u$_" => $_ } qw(seen answered flagged draft);
our @EXPORT = qw(uri_section imap_uri nntp_uri);
+sub ndump {
+ require Data::Dumper;
+ Data::Dumper->new(\@_)->Useqq(1)->Terse(1)->Dump;
+}
+
# returns the git config section name, e.g [imap "imaps://user@example.com"]
# without the mailbox, so we can share connections between different inboxes
sub uri_section ($) {
@@ -530,7 +534,7 @@ sub _nntp_fetch_all ($$$) {
my $sec = uri_section($uri);
my ($nr, $beg, $end) = $nn->group($group);
unless (defined($nr)) {
- chomp(my $msg = $nn->message);
+ my $msg = ndump($nn->message);
return "E: GROUP $group <$sec> $msg";
}
@@ -566,7 +570,7 @@ sub _nntp_fetch_all ($$$) {
}
my $raw = $nn->article($art);
unless (defined($raw)) {
- my $msg = $nn->message;
+ my $msg = ndump($nn->message);
if ($nn->code == 421) { # pseudo response from Net::Cmd
$err = "E: $msg";
last;
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 3/8] lei: share input code between convert and import
2021-03-22 7:53 [PATCH 0/8] lei input handling improvements Eric Wong
2021-03-22 7:53 ` [PATCH 1/8] lei: support -c <name>=<value> to overrides Eric Wong
2021-03-22 7:53 ` [PATCH 2/8] net_reader: escape nasty chars from Net::NNTP->message Eric Wong
@ 2021-03-22 7:53 ` Eric Wong
2021-03-22 7:53 ` [PATCH 4/8] lei: simplify workers_start and callers Eric Wong
` (4 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Eric Wong @ 2021-03-22 7:53 UTC (permalink / raw)
To: meta
These commands accept mail the same way, and this forces
us to maintain consistent input format support between
commands.
We'll be using this for "lei mark", too.
---
MANIFEST | 1 +
lib/PublicInbox/LEI.pm | 17 -------
lib/PublicInbox/LeiConvert.pm | 60 +++----------------------
lib/PublicInbox/LeiImport.pm | 57 ++---------------------
lib/PublicInbox/LeiInput.pm | 85 +++++++++++++++++++++++++++++++++++
5 files changed, 94 insertions(+), 126 deletions(-)
create mode 100644 lib/PublicInbox/LeiInput.pm
diff --git a/MANIFEST b/MANIFEST
index b6b4a3ab..df8440ef 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -187,6 +187,7 @@ lib/PublicInbox/LeiDedupe.pm
lib/PublicInbox/LeiExternal.pm
lib/PublicInbox/LeiHelp.pm
lib/PublicInbox/LeiImport.pm
+lib/PublicInbox/LeiInput.pm
lib/PublicInbox/LeiMirror.pm
lib/PublicInbox/LeiOverview.pm
lib/PublicInbox/LeiP2q.pm
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 9e3bb9b7..0bd52a46 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -419,23 +419,6 @@ sub fail ($$;$) {
undef;
}
-sub check_input_format ($;$) {
- my ($self, $files) = @_;
- my $opt_key = 'in-format';
- my $fmt = $self->{opt}->{$opt_key};
- if (!$fmt) {
- my $err = $files ? "regular file(s):\n@$files" : '--stdin';
- return fail($self, "--$opt_key unset for $err");
- }
- require PublicInbox::MboxLock if $files;
- require PublicInbox::MboxReader;
- return 1 if $fmt eq 'eml';
- # XXX: should this handle {gz,bz2,xz}? that's currently in LeiToMail
- PublicInbox::MboxReader->can($fmt) or
- return fail($self, "--$opt_key=$fmt unrecognized");
- 1;
-}
-
sub out ($;@) {
my $self = shift;
return if print { $self->{1} // return } @_; # likely
diff --git a/lib/PublicInbox/LeiConvert.pm b/lib/PublicInbox/LeiConvert.pm
index 8d3b221a..0aa13229 100644
--- a/lib/PublicInbox/LeiConvert.pm
+++ b/lib/PublicInbox/LeiConvert.pm
@@ -5,7 +5,7 @@
package PublicInbox::LeiConvert;
use strict;
use v5.10.1;
-use parent qw(PublicInbox::IPC);
+use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
use PublicInbox::Eml;
use PublicInbox::LeiStore;
use PublicInbox::LeiOverview;
@@ -79,64 +79,14 @@ sub do_convert { # via wq_do
sub lei_convert { # the main "lei convert" method
my ($lei, @inputs) = @_;
- my $opt = $lei->{opt};
- $opt->{kw} //= 1;
+ $lei->{opt}->{kw} //= 1;
+ $lei->{opt}->{dedupe} //= 'none';
my $self = $lei->{cnv} = bless {}, __PACKAGE__;
- my $in_fmt = $opt->{'in-format'};
- my (@f, @d);
- $opt->{dedupe} //= 'none';
my $ovv = PublicInbox::LeiOverview->new($lei, 'out-format');
$lei->{l2m} or return
$lei->fail("output not specified or is not a mail destination");
- my $net = $lei->{net}; # NetWriter may be created by l2m
- $opt->{augment} = 1 unless $ovv->{dst} eq '/dev/stdout';
- if ($opt->{stdin}) {
- @inputs and return $lei->fail("--stdin and @inputs do not mix");
- $lei->check_input_format(undef) or return;
- $self->{0} = $lei->{0};
- }
- # e.g. Maildir:/home/user/Mail/ or imaps://example.com/INBOX
- for my $input (@inputs) {
- my $input_path = $input;
- if ($input =~ m!\A(?:imaps?|nntps?|s?news)://!i) {
- require PublicInbox::NetReader;
- $net //= PublicInbox::NetReader->new;
- $net->add_url($input);
- } elsif ($input_path =~ s/\A([a-z0-9]+)://is) {
- my $ifmt = lc $1;
- if (($in_fmt // $ifmt) ne $ifmt) {
- return $lei->fail(<<"");
---in-format=$in_fmt and `$ifmt:' conflict
-
- }
- if (-f $input_path) {
- require PublicInbox::MboxLock;
- require PublicInbox::MboxReader;
- PublicInbox::MboxReader->can($ifmt) or return
- $lei->fail("$ifmt not supported");
- } elsif (-d _) {
- require PublicInbox::MdirReader;
- $ifmt eq 'maildir' or return
- $lei->fail("$ifmt not supported");
- } else {
- return $lei->fail("Unable to handle $input");
- }
- } elsif (-f $input) { push @f, $input }
- elsif (-d _) { push @d, $input }
- else { return $lei->fail("Unable to handle $input") }
- }
- if (@f) { $lei->check_input_format(\@f) or return }
- if (@d) { # TODO: check for MH vs Maildir, here
- require PublicInbox::MdirReader;
- }
- $self->{inputs} = \@inputs;
- if ($net) {
- if (my $err = $net->errors) {
- return $lei->fail($err);
- }
- $net->{quiet} = $opt->{quiet};
- $lei->{net} //= $net;
- }
+ $lei->{opt}->{augment} = 1 unless $ovv->{dst} eq '/dev/stdout';
+ $self->prepare_inputs($lei, \@inputs) or return;
my $op = $lei->workers_start($self, 'lei_convert', 1, {
'' => [ $lei->can('dclose'), $lei ]
});
diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm
index 0e2a96e8..e769fba8 100644
--- a/lib/PublicInbox/LeiImport.pm
+++ b/lib/PublicInbox/LeiImport.pm
@@ -5,7 +5,7 @@
package PublicInbox::LeiImport;
use strict;
use v5.10.1;
-use parent qw(PublicInbox::IPC);
+use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
use PublicInbox::Eml;
use PublicInbox::PktOp qw(pkt_do);
@@ -67,60 +67,9 @@ sub lei_import { # the main "lei import" method
my ($lei, @inputs) = @_;
my $sto = $lei->_lei_store(1);
$sto->write_prepare($lei);
- my ($net, @f, @d);
$lei->{opt}->{kw} //= 1;
- my $self = $lei->{imp} = bless { inputs => \@inputs }, __PACKAGE__;
- if ($lei->{opt}->{stdin}) {
- @inputs and return $lei->fail("--stdin and @inputs do not mix");
- $lei->check_input_format or return;
- $self->{0} = $lei->{0};
- }
-
- my $fmt = $lei->{opt}->{'in-format'};
- # e.g. Maildir:/home/user/Mail/ or imaps://example.com/INBOX
- for my $input (@inputs) {
- my $input_path = $input;
- if ($input =~ m!\A(?:imaps?|nntps?|s?news)://!i) {
- require PublicInbox::NetReader;
- $net //= PublicInbox::NetReader->new;
- $net->add_url($input);
- } elsif ($input_path =~ s/\A([a-z0-9]+)://is) {
- my $ifmt = lc $1;
- if (($fmt // $ifmt) ne $ifmt) {
- return $lei->fail(<<"");
---in-format=$fmt and `$ifmt:' conflict
-
- }
- if (-f $input_path) {
- require PublicInbox::MboxLock;
- require PublicInbox::MboxReader;
- PublicInbox::MboxReader->can($ifmt) or return
- $lei->fail("$ifmt not supported");
- } elsif (-d _) {
- require PublicInbox::MdirReader;
- $ifmt eq 'maildir' or return
- $lei->fail("$ifmt not supported");
- } else {
- return $lei->fail("Unable to handle $input");
- }
- } elsif (-f $input) { push @f, $input
- } elsif (-d _) { push @d, $input
- } else { return $lei->fail("Unable to handle $input") }
- }
- if (@f) { $lei->check_input_format(\@f) or return }
- if (@d) { # TODO: check for MH vs Maildir, here
- require PublicInbox::MdirReader;
- }
- $self->{inputs} = \@inputs;
- if ($net) {
- if (my $err = $net->errors) {
- return $lei->fail($err);
- }
- $net->{quiet} = $lei->{opt}->{quiet};
- $lei->{net} = $net;
- require PublicInbox::LeiAuth;
- $lei->{auth} = PublicInbox::LeiAuth->new;
- }
+ my $self = $lei->{imp} = bless {}, __PACKAGE__;
+ $self->prepare_inputs($lei, \@inputs) or return;
import_start($lei);
}
diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm
new file mode 100644
index 00000000..89585a52
--- /dev/null
+++ b/lib/PublicInbox/LeiInput.pm
@@ -0,0 +1,85 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# parent class for LeiImport, LeiConvert
+package PublicInbox::LeiInput;
+use strict;
+use v5.10.1;
+
+sub check_input_format ($;$) {
+ my ($lei, $files) = @_;
+ my $opt_key = 'in-format';
+ my $fmt = $lei->{opt}->{$opt_key};
+ if (!$fmt) {
+ my $err = $files ? "regular file(s):\n@$files" : '--stdin';
+ return $lei->fail("--$opt_key unset for $err");
+ }
+ require PublicInbox::MboxLock if $files;
+ require PublicInbox::MboxReader;
+ return 1 if $fmt eq 'eml';
+ # XXX: should this handle {gz,bz2,xz}? that's currently in LeiToMail
+ PublicInbox::MboxReader->can($fmt) or
+ return $lei->fail("--$opt_key=$fmt unrecognized");
+ 1;
+}
+
+
+sub prepare_inputs {
+ my ($self, $lei, $inputs) = @_;
+ my $in_fmt = $lei->{opt}->{'in-format'};
+ if ($lei->{opt}->{stdin}) {
+ @$inputs and return
+ $lei->fail("--stdin and @$inputs do not mix");
+ check_input_format($lei) or return;
+ $self->{0} = $lei->{0};
+ }
+ my $net = $lei->{net}; # NetWriter may be created by l2m
+ my $fmt = $lei->{opt}->{'in-format'};
+ my (@f, @d);
+ # e.g. Maildir:/home/user/Mail/ or imaps://example.com/INBOX
+ for my $input (@$inputs) {
+ my $input_path = $input;
+ if ($input =~ m!\A(?:imaps?|nntps?|s?news)://!i) {
+ require PublicInbox::NetReader;
+ $net //= PublicInbox::NetReader->new;
+ $net->add_url($input);
+ } elsif ($input_path =~ s/\A([a-z0-9]+)://is) {
+ my $ifmt = lc $1;
+ if (($in_fmt // $ifmt) ne $ifmt) {
+ return $lei->fail(<<"");
+--in-format=$in_fmt and `$ifmt:' conflict
+
+ }
+ if (-f $input_path) {
+ require PublicInbox::MboxLock;
+ require PublicInbox::MboxReader;
+ PublicInbox::MboxReader->can($ifmt) or return
+ $lei->fail("$ifmt not supported");
+ } elsif (-d _) {
+ require PublicInbox::MdirReader;
+ $ifmt eq 'maildir' or return
+ $lei->fail("$ifmt not supported");
+ } else {
+ return $lei->fail("Unable to handle $input");
+ }
+ } elsif (-f $input) { push @f, $input }
+ elsif (-d _) { push @d, $input }
+ else { return $lei->fail("Unable to handle $input") }
+ }
+ if (@f) { check_input_format($lei, \@f) or return }
+ if (@d) { # TODO: check for MH vs Maildir, here
+ require PublicInbox::MdirReader;
+ }
+ if ($net) {
+ if (my $err = $net->errors) {
+ return $lei->fail($err);
+ }
+ $net->{quiet} = $lei->{opt}->{quiet};
+ require PublicInbox::LeiAuth;
+ $lei->{auth} //= PublicInbox::LeiAuth->new;
+ $lei->{net} //= $net;
+ }
+ $self->{inputs} = $inputs;
+}
+
+1;
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 4/8] lei: simplify workers_start and callers
2021-03-22 7:53 [PATCH 0/8] lei input handling improvements Eric Wong
` (2 preceding siblings ...)
2021-03-22 7:53 ` [PATCH 3/8] lei: share input code between convert and import Eric Wong
@ 2021-03-22 7:53 ` Eric Wong
2021-03-22 7:53 ` [PATCH 5/8] mbox_reader: add ->reads method to avoid nonsensical formats Eric Wong
` (3 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Eric Wong @ 2021-03-22 7:53 UTC (permalink / raw)
To: meta
Since workers_start is in the common PublicInbox::LEI
package, we can just use \&METHOD_NAME instead of relying
on UNIVERSAL->can to avoid a method dispatch.
Most of our worker code can just use lei->dclose, so default
to doing that unless it's been overridden.
---
lib/PublicInbox/LEI.pm | 11 ++++++-----
lib/PublicInbox/LeiConvert.pm | 4 +---
lib/PublicInbox/LeiP2q.pm | 4 +---
3 files changed, 8 insertions(+), 11 deletions(-)
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 0bd52a46..1e720b89 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -481,12 +481,13 @@ sub lei_atfork_child {
sub workers_start {
my ($lei, $wq, $ident, $jobs, $ops) = @_;
$ops = {
- '!' => [ $lei->can('fail_handler'), $lei ],
- '|' => [ $lei->can('sigpipe_handler'), $lei ],
- 'x_it' => [ $lei->can('x_it'), $lei ],
- 'child_error' => [ $lei->can('child_error'), $lei ],
- %$ops
+ '!' => [ \&fail_handler, $lei ],
+ '|' => [ \&sigpipe_handler, $lei ],
+ 'x_it' => [ \&x_it, $lei ],
+ 'child_error' => [ \&child_error, $lei ],
+ ($ops ? %$ops : ()),
};
+ $ops->{''} //= [ \&dclose, $lei ];
require PublicInbox::PktOp;
($lei->{pkt_op_c}, $lei->{pkt_op_p}) = PublicInbox::PktOp->pair($ops);
$wq->wq_workers_start($ident, $jobs, $lei->oldset, { lei => $lei });
diff --git a/lib/PublicInbox/LeiConvert.pm b/lib/PublicInbox/LeiConvert.pm
index 0aa13229..8685c194 100644
--- a/lib/PublicInbox/LeiConvert.pm
+++ b/lib/PublicInbox/LeiConvert.pm
@@ -87,9 +87,7 @@ sub lei_convert { # the main "lei convert" method
$lei->fail("output not specified or is not a mail destination");
$lei->{opt}->{augment} = 1 unless $ovv->{dst} eq '/dev/stdout';
$self->prepare_inputs($lei, \@inputs) or return;
- my $op = $lei->workers_start($self, 'lei_convert', 1, {
- '' => [ $lei->can('dclose'), $lei ]
- });
+ my $op = $lei->workers_start($self, 'lei_convert', 1);
$self->wq_io_do('do_convert', []);
$self->wq_close(1);
while ($op && $op->{sock}) { $op->event_step }
diff --git a/lib/PublicInbox/LeiP2q.pm b/lib/PublicInbox/LeiP2q.pm
index 302d7864..4abe1345 100644
--- a/lib/PublicInbox/LeiP2q.pm
+++ b/lib/PublicInbox/LeiP2q.pm
@@ -182,9 +182,7 @@ sub lei_p2q { # the "lei patch-to-query" entry point
} else {
$self->{input} = $input;
}
- my $op = $lei->workers_start($self, 'lei patch2query', 1, {
- '' => [ $lei->{p2q_done} // $lei->can('dclose'), $lei ]
- });
+ my $op = $lei->workers_start($self, 'lei patch2query', 1);
$self->wq_io_do('do_p2q', []);
$self->wq_close(1);
while ($op && $op->{sock}) { $op->event_step }
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 5/8] mbox_reader: add ->reads method to avoid nonsensical formats
2021-03-22 7:53 [PATCH 0/8] lei input handling improvements Eric Wong
` (3 preceding siblings ...)
2021-03-22 7:53 ` [PATCH 4/8] lei: simplify workers_start and callers Eric Wong
@ 2021-03-22 7:53 ` Eric Wong
2021-03-22 7:54 ` [PATCH 6/8] lei_input: common filehandle reader for eml + mbox Eric Wong
` (2 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Eric Wong @ 2021-03-22 7:53 UTC (permalink / raw)
To: meta
Relying on UNIVERSAL::can may cause internal helper methods
to be used, which can lead to failures or nonsensical results.
---
lib/PublicInbox/InboxWritable.pm | 2 +-
lib/PublicInbox/LeiImport.pm | 2 +-
lib/PublicInbox/LeiInput.pm | 4 ++--
lib/PublicInbox/MboxReader.pm | 5 +++++
4 files changed, 9 insertions(+), 4 deletions(-)
diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm
index c2baeba6..eeebc485 100644
--- a/lib/PublicInbox/InboxWritable.pm
+++ b/lib/PublicInbox/InboxWritable.pm
@@ -172,7 +172,7 @@ sub _mbox_eml_cb { # MboxReader->mbox* callback
sub import_mbox {
my ($self, $fh, $variant) = @_;
require PublicInbox::MboxReader;
- my $cb = PublicInbox::MboxReader->can($variant) or
+ my $cb = PublicInbox::MboxReader->reads($variant) or
die "$variant not supported\n";
my $im = $self->importer(1);
$cb->(undef, $fh, \&_mbox_eml_cb, $im, $self->filter);
diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm
index e769fba8..e587ada8 100644
--- a/lib/PublicInbox/LeiImport.pm
+++ b/lib/PublicInbox/LeiImport.pm
@@ -96,7 +96,7 @@ error reading $input: $!
my $eml = PublicInbox::Eml->new(\$buf);
_import_eml($eml, $lei, $kw);
} else { # some mbox (->can already checked in call);
- my $cb = PublicInbox::MboxReader->can($ifmt) //
+ my $cb = PublicInbox::MboxReader->reads($ifmt) //
die "BUG: bad fmt=$ifmt";
$cb->(undef, $fh, \&_import_eml, $lei, $kw);
}
diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm
index 89585a52..776b3151 100644
--- a/lib/PublicInbox/LeiInput.pm
+++ b/lib/PublicInbox/LeiInput.pm
@@ -18,7 +18,7 @@ sub check_input_format ($;$) {
require PublicInbox::MboxReader;
return 1 if $fmt eq 'eml';
# XXX: should this handle {gz,bz2,xz}? that's currently in LeiToMail
- PublicInbox::MboxReader->can($fmt) or
+ PublicInbox::MboxReader->reads($fmt) or
return $lei->fail("--$opt_key=$fmt unrecognized");
1;
}
@@ -53,7 +53,7 @@ sub prepare_inputs {
if (-f $input_path) {
require PublicInbox::MboxLock;
require PublicInbox::MboxReader;
- PublicInbox::MboxReader->can($ifmt) or return
+ PublicInbox::MboxReader->reads($ifmt) or return
$lei->fail("$ifmt not supported");
} elsif (-d _) {
require PublicInbox::MdirReader;
diff --git a/lib/PublicInbox/MboxReader.pm b/lib/PublicInbox/MboxReader.pm
index f93c2ec6..6302d1fa 100644
--- a/lib/PublicInbox/MboxReader.pm
+++ b/lib/PublicInbox/MboxReader.pm
@@ -133,4 +133,9 @@ sub mboxcl2 {
sub new { bless \(my $x), __PACKAGE__ }
+sub reads {
+ my $ifmt = $_[-1];
+ $ifmt =~ /\Ambox(?:rd|cl|cl2|o)\z/ ? __PACKAGE__->can($ifmt) : undef
+}
+
1;
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 6/8] lei_input: common filehandle reader for eml + mbox
2021-03-22 7:53 [PATCH 0/8] lei input handling improvements Eric Wong
` (4 preceding siblings ...)
2021-03-22 7:53 ` [PATCH 5/8] mbox_reader: add ->reads method to avoid nonsensical formats Eric Wong
@ 2021-03-22 7:54 ` Eric Wong
2021-03-22 7:54 ` [PATCH 7/8] lei_input: drop "From " line on single "eml" (message/rfc822) Eric Wong
2021-03-22 7:54 ` [PATCH 8/8] lei import: ignore Status headers in "eml" messages Eric Wong
7 siblings, 0 replies; 9+ messages in thread
From: Eric Wong @ 2021-03-22 7:54 UTC (permalink / raw)
To: meta
This improve code regularity, and will let us deal with
the "RFC822" messages with "From " line that mutt pipes
to.
---
lib/PublicInbox/LeiConvert.pm | 30 +++++++--------------
lib/PublicInbox/LeiImport.pm | 50 ++++++++++++-----------------------
lib/PublicInbox/LeiInput.pm | 17 ++++++++++++
3 files changed, 44 insertions(+), 53 deletions(-)
diff --git a/lib/PublicInbox/LeiConvert.pm b/lib/PublicInbox/LeiConvert.pm
index 8685c194..51a233bd 100644
--- a/lib/PublicInbox/LeiConvert.pm
+++ b/lib/PublicInbox/LeiConvert.pm
@@ -10,13 +10,18 @@ use PublicInbox::Eml;
use PublicInbox::LeiStore;
use PublicInbox::LeiOverview;
-sub mbox_cb {
+sub mbox_cb { # MboxReader callback used by PublicInbox::LeiInput::input_fh
my ($eml, $self) = @_;
my $kw = PublicInbox::MboxReader::mbox_keywords($eml);
$eml->header_set($_) for qw(Status X-Status);
$self->{wcb}->(undef, { kw => $kw }, $eml);
}
+sub eml_cb { # used by PublicInbox::LeiInput::input_fh
+ my ($self, $eml) = @_;
+ $self->{wcb}->(undef, { kw => [] }, $eml);
+}
+
sub net_cb { # callback for ->imap_each, ->nntp_each
my (undef, undef, $kw, $eml, $self) = @_; # @_[0,1]: url + uid ignored
$self->{wcb}->(undef, { kw => $kw }, $eml);
@@ -27,30 +32,15 @@ sub mdir_cb {
$self->{wcb}->(undef, { kw => $kw }, $eml);
}
-sub convert_fh ($$$$) {
- my ($self, $ifmt, $fh, $name) = @_;
- if ($ifmt eq 'eml') {
- my $buf = do { local $/; <$fh> } //
- return $self->{lei}->child_error(1 << 8, <<"");
-error reading $name: $!
-
- my $eml = PublicInbox::Eml->new(\$buf);
- $self->{wcb}->(undef, { kw => [] }, $eml);
- } else {
- PublicInbox::MboxReader->$ifmt($fh, \&mbox_cb, $self);
- }
-}
-
sub do_convert { # via wq_do
my ($self) = @_;
my $lei = $self->{lei};
- my $in_fmt = $lei->{opt}->{'in-format'};
- my $mics;
+ my $ifmt = $lei->{opt}->{'in-format'};
if (my $stdin = delete $self->{0}) {
- convert_fh($self, $in_fmt, $stdin, '<stdin>');
+ $self->input_fh($ifmt, $stdin, '<stdin>');
}
for my $input (@{$self->{inputs}}) {
- my $ifmt = lc($in_fmt // '');
+ my $ifmt = lc($ifmt // '');
if ($input =~ m!\Aimaps?://!) {
$lei->{net}->imap_each($input, \&net_cb, $self);
next;
@@ -65,7 +55,7 @@ sub do_convert { # via wq_do
($ifmt eq 'eml' ? ['none'] :
PublicInbox::MboxLock->defaults);
my $mbl = PublicInbox::MboxLock->acq($input, 0, $m);
- convert_fh($self, $ifmt, $mbl->{fh}, $input);
+ $self->input_fh($ifmt, $mbl->{fh}, $input);
} elsif (-d _) {
PublicInbox::MdirReader::maildir_each_eml($input,
\&mdir_cb, $self);
diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm
index e587ada8..767cae60 100644
--- a/lib/PublicInbox/LeiImport.pm
+++ b/lib/PublicInbox/LeiImport.pm
@@ -9,15 +9,20 @@ use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
use PublicInbox::Eml;
use PublicInbox::PktOp qw(pkt_do);
-sub _import_eml { # MboxReader callback
- my ($eml, $lei, $mbox_keywords) = @_;
+sub eml_cb { # used by PublicInbox::LeiInput::input_fh
+ my ($self, $eml) = @_;
my $vmd;
- if ($mbox_keywords) {
- my $kw = $mbox_keywords->($eml);
+ if ($self->{-import_kw}) { # FIXME
+ my $kw = PublicInbox::MboxReader::mbox_keywords($eml);
$vmd = { kw => $kw } if scalar(@$kw);
}
- my $xoids = $lei->{ale}->xoids_for($eml);
- $lei->{sto}->ipc_do('set_eml', $eml, $vmd, $xoids);
+ my $xoids = $self->{lei}->{ale}->xoids_for($eml);
+ $self->{lei}->{sto}->ipc_do('set_eml', $eml, $vmd, $xoids);
+}
+
+sub mbox_cb { # MboxReader callback used by PublicInbox::LeiInput::input_fh
+ my ($eml, $self) = @_;
+ eml_cb($self, $eml);
}
sub import_done_wait { # dwaitpid callback
@@ -46,7 +51,7 @@ sub net_merge_complete { # callback used by LeiAuth
sub import_start {
my ($lei) = @_;
my $self = $lei->{imp};
- $lei->ale;
+ $lei->ale; # initialize for workers to read
my $j = $lei->{opt}->{jobs} // scalar(@{$self->{inputs}}) || 1;
if (my $net = $lei->{net}) {
# $j = $net->net_concurrency($j); TODO
@@ -67,8 +72,8 @@ sub lei_import { # the main "lei import" method
my ($lei, @inputs) = @_;
my $sto = $lei->_lei_store(1);
$sto->write_prepare($lei);
- $lei->{opt}->{kw} //= 1;
my $self = $lei->{imp} = bless {}, __PACKAGE__;
+ $self->{-import_kw} = $lei->{opt}->{kw} // 1;
$self->prepare_inputs($lei, \@inputs) or return;
import_start($lei);
}
@@ -83,27 +88,6 @@ sub ipc_atfork_child {
undef;
}
-sub _import_fh {
- my ($lei, $fh, $input, $ifmt) = @_;
- my $kw = $lei->{opt}->{kw} ?
- PublicInbox::MboxReader->can('mbox_keywords') : undef;
- eval {
- if ($ifmt eq 'eml') {
- my $buf = do { local $/; <$fh> } //
- return $lei->child_error(1 << 8, <<"");
-error reading $input: $!
-
- my $eml = PublicInbox::Eml->new(\$buf);
- _import_eml($eml, $lei, $kw);
- } else { # some mbox (->can already checked in call);
- my $cb = PublicInbox::MboxReader->reads($ifmt) //
- die "BUG: bad fmt=$ifmt";
- $cb->(undef, $fh, \&_import_eml, $lei, $kw);
- }
- };
- $lei->child_error(1 << 8, "$input: $@") if $@;
-}
-
sub _import_maildir { # maildir_each_eml cb
my ($f, $kw, $eml, $sto, $set_kw) = @_;
$sto->ipc_do('set_eml', $eml, $set_kw ? { kw => $kw }: ());
@@ -121,7 +105,7 @@ sub import_path_url {
# TODO auto-detect?
if ($input =~ m!\Aimaps?://!i) {
$lei->{net}->imap_each($input, \&_import_net, $lei->{sto},
- $lei->{opt}->{kw});
+ $self->{-import_kw});
return;
} elsif ($input =~ m!\A(?:nntps?|s?news)://!i) {
$lei->{net}->nntp_each($input, \&_import_net, $lei->{sto}, 0);
@@ -133,14 +117,14 @@ sub import_path_url {
my $m = $lei->{opt}->{'lock'} // ($ifmt eq 'eml' ? ['none'] :
PublicInbox::MboxLock->defaults);
my $mbl = PublicInbox::MboxLock->acq($input, 0, $m);
- _import_fh($lei, $mbl->{fh}, $input, $ifmt);
+ $self->input_fh($ifmt, $mbl->{fh}, $input);
} elsif (-d _ && (-d "$input/cur" || -d "$input/new")) {
return $lei->fail(<<EOM) if $ifmt && $ifmt ne 'maildir';
$input appears to a be a maildir, not $ifmt
EOM
PublicInbox::MdirReader::maildir_each_eml($input,
\&_import_maildir,
- $lei->{sto}, $lei->{opt}->{kw});
+ $lei->{sto}, $self->{-import_kw});
} else {
$lei->fail("$input unsupported (TODO)");
}
@@ -150,7 +134,7 @@ sub import_stdin {
my ($self) = @_;
my $lei = $self->{lei};
my $in = delete $self->{0};
- _import_fh($lei, $in, '<stdin>', $lei->{opt}->{'in-format'});
+ $self->input_fh($lei->{opt}->{'in-format'}, $in, '<stdin>');
}
no warnings 'once'; # the following works even when LeiAuth is lazy-loaded
diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm
index 776b3151..c62b0893 100644
--- a/lib/PublicInbox/LeiInput.pm
+++ b/lib/PublicInbox/LeiInput.pm
@@ -23,6 +23,23 @@ sub check_input_format ($;$) {
1;
}
+# import a single file handle of $name
+# Subclass must define ->eml_cb and ->mbox_cb
+sub input_fh {
+ my ($self, $ifmt, $fh, $name, @args) = @_;
+ if ($ifmt eq 'eml') {
+ my $buf = do { local $/; <$fh> } //
+ return $self->{lei}->child_error(1 << 8, <<"");
+error reading $name: $!
+
+ $self->eml_cb(PublicInbox::Eml->new(\$buf), @args);
+ } else {
+ # prepare_inputs already validated $ifmt
+ my $cb = PublicInbox::MboxReader->reads($ifmt) //
+ die "BUG: bad fmt=$ifmt";
+ $cb->(undef, $fh, $self->can('mbox_cb'), $self, @args);
+ }
+}
sub prepare_inputs {
my ($self, $lei, $inputs) = @_;
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 7/8] lei_input: drop "From " line on single "eml" (message/rfc822)
2021-03-22 7:53 [PATCH 0/8] lei input handling improvements Eric Wong
` (5 preceding siblings ...)
2021-03-22 7:54 ` [PATCH 6/8] lei_input: common filehandle reader for eml + mbox Eric Wong
@ 2021-03-22 7:54 ` Eric Wong
2021-03-22 7:54 ` [PATCH 8/8] lei import: ignore Status headers in "eml" messages Eric Wong
7 siblings, 0 replies; 9+ messages in thread
From: Eric Wong @ 2021-03-22 7:54 UTC (permalink / raw)
To: meta
This matches the long-standing behavior of public-inbox-mda,
public-inbox-learn and our other tools. It is useful because
mutt, "git format-patch", and likely other tools will
pipe a single message with a "From " header line, but with
no further "From " escaping or Content-Length: header.
---
lib/PublicInbox/LeiInput.pm | 4 ++++
t/lei-import.t | 10 ++++++++++
2 files changed, 14 insertions(+)
diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm
index c62b0893..859fdb11 100644
--- a/lib/PublicInbox/LeiInput.pm
+++ b/lib/PublicInbox/LeiInput.pm
@@ -32,6 +32,10 @@ sub input_fh {
return $self->{lei}->child_error(1 << 8, <<"");
error reading $name: $!
+ # mutt pipes single RFC822 messages with a "From " line,
+ # but no Content-Length or "From " escaping.
+ # "git format-patch" also generates such files by default.
+ $buf =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
$self->eml_cb(PublicInbox::Eml->new(\$buf), @args);
} else {
# prepare_inputs already validated $ifmt
diff --git a/t/lei-import.t b/t/lei-import.t
index e0b517f4..eef1e4e2 100644
--- a/t/lei-import.t
+++ b/t/lei-import.t
@@ -29,6 +29,16 @@ lei_ok(qw(q s:boolean -f mboxrd), \'blob accessible after import');
lei_ok(qw(import -F eml), 't/data/message_embed.eml',
\'import single file by path');
+lei_ok(qw(q m:testmessage@example.com));
+is($lei_out, "[null]\n", 'no results, yet');
+my $oid = '9bf1002c49eb075df47247b74d69bcd555e23422';
+my $eml = eml_load('t/utf8.eml');
+my $in = 'From x@y Fri Oct 2 00:00:00 1993'."\n".$eml->as_string;
+lei_ok([qw(import -F eml -)], undef, { %$lei_opt, 0 => \$in });
+lei_ok(qw(q m:testmessage@example.com));
+is(json_utf8->decode($lei_out)->[0]->{'blob'}, $oid,
+ 'got expected OID w/o From');
+
my $str = <<'';
From: a@b
Message-ID: <x@y>
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 8/8] lei import: ignore Status headers in "eml" messages
2021-03-22 7:53 [PATCH 0/8] lei input handling improvements Eric Wong
` (6 preceding siblings ...)
2021-03-22 7:54 ` [PATCH 7/8] lei_input: drop "From " line on single "eml" (message/rfc822) Eric Wong
@ 2021-03-22 7:54 ` Eric Wong
7 siblings, 0 replies; 9+ messages in thread
From: Eric Wong @ 2021-03-22 7:54 UTC (permalink / raw)
To: meta
Those headers only have meaning with for mboxes. Don't surprise
users by trying to make sense of a header that is defined for mboxes.
It's possible to send email with (Status|X-Status) headers and
have those headers show up in a recipient's IMAP mailbox.
This was bad because an IMAP user may want to import a single
message through their MUA and pipe its contents to "lei import"
without noticing a mischievious sender stuck "X-Status: F"
(flagged/important) in there.
---
lib/PublicInbox/LeiImport.pm | 14 +++++++-------
t/lei-import.t | 27 ++++++++++++++++++++++-----
2 files changed, 29 insertions(+), 12 deletions(-)
diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm
index 767cae60..9ad2ff12 100644
--- a/lib/PublicInbox/LeiImport.pm
+++ b/lib/PublicInbox/LeiImport.pm
@@ -10,19 +10,19 @@ use PublicInbox::Eml;
use PublicInbox::PktOp qw(pkt_do);
sub eml_cb { # used by PublicInbox::LeiInput::input_fh
- my ($self, $eml) = @_;
- my $vmd;
- if ($self->{-import_kw}) { # FIXME
- my $kw = PublicInbox::MboxReader::mbox_keywords($eml);
- $vmd = { kw => $kw } if scalar(@$kw);
- }
+ my ($self, $eml, $vmd) = @_;
my $xoids = $self->{lei}->{ale}->xoids_for($eml);
$self->{lei}->{sto}->ipc_do('set_eml', $eml, $vmd, $xoids);
}
sub mbox_cb { # MboxReader callback used by PublicInbox::LeiInput::input_fh
my ($eml, $self) = @_;
- eml_cb($self, $eml);
+ my $vmd;
+ if ($self->{-import_kw}) {
+ my $kw = PublicInbox::MboxReader::mbox_keywords($eml);
+ $vmd = { kw => $kw } if scalar(@$kw);
+ }
+ eml_cb($self, $eml, $vmd);
}
sub import_done_wait { # dwaitpid callback
diff --git a/t/lei-import.t b/t/lei-import.t
index eef1e4e2..a697d756 100644
--- a/t/lei-import.t
+++ b/t/lei-import.t
@@ -39,27 +39,44 @@ lei_ok(qw(q m:testmessage@example.com));
is(json_utf8->decode($lei_out)->[0]->{'blob'}, $oid,
'got expected OID w/o From');
-my $str = <<'';
+my $eml_str = <<'';
From: a@b
Message-ID: <x@y>
Status: RO
-my $opt = { %$lei_opt, 0 => \$str };
+my $opt = { %$lei_opt, 0 => \$eml_str };
lei_ok([qw(import -F eml -)], undef, $opt,
\'import single file with keywords from stdin');
lei_ok(qw(q m:x@y));
my $res = json_utf8->decode($lei_out);
is($res->[1], undef, 'only one result');
-is_deeply($res->[0]->{kw}, ['seen'], "message `seen' keyword set");
+is($res->[0]->{'m'}, 'x@y', 'got expected message');
+is($res->[0]->{kw}, undef, 'Status ignored for eml');
+lei_ok(qw(q -f mboxrd m:x@y));
+unlike($lei_out, qr/^Status:/, 'no Status: in imported message');
-$str =~ tr/x/v/; # v@y
-lei_ok([qw(import --no-kw -F eml -)], undef, $opt,
+
+$eml->header_set('Message-ID', '<v@y>');
+$eml->header_set('Status', 'RO');
+$in = 'From v@y Fri Oct 2 00:00:00 1993'."\n".$eml->as_string;
+lei_ok([qw(import --no-kw -F mboxrd -)], undef, { %$lei_opt, 0 => \$in },
\'import single file with --no-kw from stdin');
lei(qw(q m:v@y));
$res = json_utf8->decode($lei_out);
is($res->[1], undef, 'only one result');
+is($res->[0]->{'m'}, 'v@y', 'got expected message');
is($res->[0]->{kw}, undef, 'no keywords set');
+$eml->header_set('Message-ID', '<k@y>');
+$in = 'From k@y Fri Oct 2 00:00:00 1993'."\n".$eml->as_string;
+lei_ok([qw(import -F mboxrd -)], undef, { %$lei_opt, 0 => \$in },
+ \'import single file with --kw (default) from stdin');
+lei(qw(q m:k@y));
+$res = json_utf8->decode($lei_out);
+is($res->[1], undef, 'only one result');
+is($res->[0]->{'m'}, 'k@y', 'got expected message');
+is_deeply($res->[0]->{kw}, ['seen'], "`seen' keywords set");
+
# see t/lei_to_mail.t for "import -F mbox*"
});
done_testing;
^ permalink raw reply related [flat|nested] 9+ messages in thread