From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 74AFD1FB0F for ; Mon, 22 Mar 2021 07:54:03 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 6/8] lei_input: common filehandle reader for eml + mbox Date: Mon, 22 Mar 2021 07:54:00 +0000 Message-Id: <20210322075402.27834-7-e@80x24.org> In-Reply-To: <20210322075402.27834-1-e@80x24.org> References: <20210322075402.27834-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This improve code regularity, and will let us deal with the "RFC822" messages with "From " line that mutt pipes to. --- lib/PublicInbox/LeiConvert.pm | 30 +++++++-------------- lib/PublicInbox/LeiImport.pm | 50 ++++++++++++----------------------- lib/PublicInbox/LeiInput.pm | 17 ++++++++++++ 3 files changed, 44 insertions(+), 53 deletions(-) diff --git a/lib/PublicInbox/LeiConvert.pm b/lib/PublicInbox/LeiConvert.pm index 8685c194..51a233bd 100644 --- a/lib/PublicInbox/LeiConvert.pm +++ b/lib/PublicInbox/LeiConvert.pm @@ -10,13 +10,18 @@ use PublicInbox::Eml; use PublicInbox::LeiStore; use PublicInbox::LeiOverview; -sub mbox_cb { +sub mbox_cb { # MboxReader callback used by PublicInbox::LeiInput::input_fh my ($eml, $self) = @_; my $kw = PublicInbox::MboxReader::mbox_keywords($eml); $eml->header_set($_) for qw(Status X-Status); $self->{wcb}->(undef, { kw => $kw }, $eml); } +sub eml_cb { # used by PublicInbox::LeiInput::input_fh + my ($self, $eml) = @_; + $self->{wcb}->(undef, { kw => [] }, $eml); +} + sub net_cb { # callback for ->imap_each, ->nntp_each my (undef, undef, $kw, $eml, $self) = @_; # @_[0,1]: url + uid ignored $self->{wcb}->(undef, { kw => $kw }, $eml); @@ -27,30 +32,15 @@ sub mdir_cb { $self->{wcb}->(undef, { kw => $kw }, $eml); } -sub convert_fh ($$$$) { - my ($self, $ifmt, $fh, $name) = @_; - if ($ifmt eq 'eml') { - my $buf = do { local $/; <$fh> } // - return $self->{lei}->child_error(1 << 8, <<""); -error reading $name: $! - - my $eml = PublicInbox::Eml->new(\$buf); - $self->{wcb}->(undef, { kw => [] }, $eml); - } else { - PublicInbox::MboxReader->$ifmt($fh, \&mbox_cb, $self); - } -} - sub do_convert { # via wq_do my ($self) = @_; my $lei = $self->{lei}; - my $in_fmt = $lei->{opt}->{'in-format'}; - my $mics; + my $ifmt = $lei->{opt}->{'in-format'}; if (my $stdin = delete $self->{0}) { - convert_fh($self, $in_fmt, $stdin, ''); + $self->input_fh($ifmt, $stdin, ''); } for my $input (@{$self->{inputs}}) { - my $ifmt = lc($in_fmt // ''); + my $ifmt = lc($ifmt // ''); if ($input =~ m!\Aimaps?://!) { $lei->{net}->imap_each($input, \&net_cb, $self); next; @@ -65,7 +55,7 @@ sub do_convert { # via wq_do ($ifmt eq 'eml' ? ['none'] : PublicInbox::MboxLock->defaults); my $mbl = PublicInbox::MboxLock->acq($input, 0, $m); - convert_fh($self, $ifmt, $mbl->{fh}, $input); + $self->input_fh($ifmt, $mbl->{fh}, $input); } elsif (-d _) { PublicInbox::MdirReader::maildir_each_eml($input, \&mdir_cb, $self); diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm index e587ada8..767cae60 100644 --- a/lib/PublicInbox/LeiImport.pm +++ b/lib/PublicInbox/LeiImport.pm @@ -9,15 +9,20 @@ use parent qw(PublicInbox::IPC PublicInbox::LeiInput); use PublicInbox::Eml; use PublicInbox::PktOp qw(pkt_do); -sub _import_eml { # MboxReader callback - my ($eml, $lei, $mbox_keywords) = @_; +sub eml_cb { # used by PublicInbox::LeiInput::input_fh + my ($self, $eml) = @_; my $vmd; - if ($mbox_keywords) { - my $kw = $mbox_keywords->($eml); + if ($self->{-import_kw}) { # FIXME + my $kw = PublicInbox::MboxReader::mbox_keywords($eml); $vmd = { kw => $kw } if scalar(@$kw); } - my $xoids = $lei->{ale}->xoids_for($eml); - $lei->{sto}->ipc_do('set_eml', $eml, $vmd, $xoids); + my $xoids = $self->{lei}->{ale}->xoids_for($eml); + $self->{lei}->{sto}->ipc_do('set_eml', $eml, $vmd, $xoids); +} + +sub mbox_cb { # MboxReader callback used by PublicInbox::LeiInput::input_fh + my ($eml, $self) = @_; + eml_cb($self, $eml); } sub import_done_wait { # dwaitpid callback @@ -46,7 +51,7 @@ sub net_merge_complete { # callback used by LeiAuth sub import_start { my ($lei) = @_; my $self = $lei->{imp}; - $lei->ale; + $lei->ale; # initialize for workers to read my $j = $lei->{opt}->{jobs} // scalar(@{$self->{inputs}}) || 1; if (my $net = $lei->{net}) { # $j = $net->net_concurrency($j); TODO @@ -67,8 +72,8 @@ sub lei_import { # the main "lei import" method my ($lei, @inputs) = @_; my $sto = $lei->_lei_store(1); $sto->write_prepare($lei); - $lei->{opt}->{kw} //= 1; my $self = $lei->{imp} = bless {}, __PACKAGE__; + $self->{-import_kw} = $lei->{opt}->{kw} // 1; $self->prepare_inputs($lei, \@inputs) or return; import_start($lei); } @@ -83,27 +88,6 @@ sub ipc_atfork_child { undef; } -sub _import_fh { - my ($lei, $fh, $input, $ifmt) = @_; - my $kw = $lei->{opt}->{kw} ? - PublicInbox::MboxReader->can('mbox_keywords') : undef; - eval { - if ($ifmt eq 'eml') { - my $buf = do { local $/; <$fh> } // - return $lei->child_error(1 << 8, <<""); -error reading $input: $! - - my $eml = PublicInbox::Eml->new(\$buf); - _import_eml($eml, $lei, $kw); - } else { # some mbox (->can already checked in call); - my $cb = PublicInbox::MboxReader->reads($ifmt) // - die "BUG: bad fmt=$ifmt"; - $cb->(undef, $fh, \&_import_eml, $lei, $kw); - } - }; - $lei->child_error(1 << 8, "$input: $@") if $@; -} - sub _import_maildir { # maildir_each_eml cb my ($f, $kw, $eml, $sto, $set_kw) = @_; $sto->ipc_do('set_eml', $eml, $set_kw ? { kw => $kw }: ()); @@ -121,7 +105,7 @@ sub import_path_url { # TODO auto-detect? if ($input =~ m!\Aimaps?://!i) { $lei->{net}->imap_each($input, \&_import_net, $lei->{sto}, - $lei->{opt}->{kw}); + $self->{-import_kw}); return; } elsif ($input =~ m!\A(?:nntps?|s?news)://!i) { $lei->{net}->nntp_each($input, \&_import_net, $lei->{sto}, 0); @@ -133,14 +117,14 @@ sub import_path_url { my $m = $lei->{opt}->{'lock'} // ($ifmt eq 'eml' ? ['none'] : PublicInbox::MboxLock->defaults); my $mbl = PublicInbox::MboxLock->acq($input, 0, $m); - _import_fh($lei, $mbl->{fh}, $input, $ifmt); + $self->input_fh($ifmt, $mbl->{fh}, $input); } elsif (-d _ && (-d "$input/cur" || -d "$input/new")) { return $lei->fail(<{sto}, $lei->{opt}->{kw}); + $lei->{sto}, $self->{-import_kw}); } else { $lei->fail("$input unsupported (TODO)"); } @@ -150,7 +134,7 @@ sub import_stdin { my ($self) = @_; my $lei = $self->{lei}; my $in = delete $self->{0}; - _import_fh($lei, $in, '', $lei->{opt}->{'in-format'}); + $self->input_fh($lei->{opt}->{'in-format'}, $in, ''); } no warnings 'once'; # the following works even when LeiAuth is lazy-loaded diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm index 776b3151..c62b0893 100644 --- a/lib/PublicInbox/LeiInput.pm +++ b/lib/PublicInbox/LeiInput.pm @@ -23,6 +23,23 @@ sub check_input_format ($;$) { 1; } +# import a single file handle of $name +# Subclass must define ->eml_cb and ->mbox_cb +sub input_fh { + my ($self, $ifmt, $fh, $name, @args) = @_; + if ($ifmt eq 'eml') { + my $buf = do { local $/; <$fh> } // + return $self->{lei}->child_error(1 << 8, <<""); +error reading $name: $! + + $self->eml_cb(PublicInbox::Eml->new(\$buf), @args); + } else { + # prepare_inputs already validated $ifmt + my $cb = PublicInbox::MboxReader->reads($ifmt) // + die "BUG: bad fmt=$ifmt"; + $cb->(undef, $fh, $self->can('mbox_cb'), $self, @args); + } +} sub prepare_inputs { my ($self, $lei, $inputs) = @_;