user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 4/5] lei_input: more common code between <mark|convert|import>
Date: Tue, 23 Mar 2021 11:48:07 +0000	[thread overview]
Message-ID: <20210323114808.7605-5-e@80x24.org> (raw)
In-Reply-To: <20210323114808.7605-1-e@80x24.org>

"lei convert" is actually a bit of the odd one, since
it uses lei2mail for auth, unlike the others.
---
 lib/PublicInbox/LeiConvert.pm | 47 ++++++----------------
 lib/PublicInbox/LeiImport.pm  | 74 +++++++++--------------------------
 lib/PublicInbox/LeiInput.pm   | 45 +++++++++++++++++++--
 lib/PublicInbox/LeiMark.pm    | 57 +++++----------------------
 4 files changed, 80 insertions(+), 143 deletions(-)

diff --git a/lib/PublicInbox/LeiConvert.pm b/lib/PublicInbox/LeiConvert.pm
index 49e2b7af..bc86fe25 100644
--- a/lib/PublicInbox/LeiConvert.pm
+++ b/lib/PublicInbox/LeiConvert.pm
@@ -6,64 +6,39 @@ package PublicInbox::LeiConvert;
 use strict;
 use v5.10.1;
 use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
-use PublicInbox::Eml;
-use PublicInbox::LeiStore;
 use PublicInbox::LeiOverview;
 
-sub mbox_cb { # MboxReader callback used by PublicInbox::LeiInput::input_fh
+# /^input_/ subs are used by PublicInbox::LeiInput
+
+sub input_mbox_cb { # MboxReader callback
 	my ($eml, $self) = @_;
 	my $kw = PublicInbox::MboxReader::mbox_keywords($eml);
 	$eml->header_set($_) for qw(Status X-Status);
 	$self->{wcb}->(undef, { kw => $kw }, $eml);
 }
 
-sub eml_cb { # used by PublicInbox::LeiInput::input_fh
+sub input_eml_cb { # used by PublicInbox::LeiInput::input_fh
 	my ($self, $eml) = @_;
-	$self->{wcb}->(undef, { kw => [] }, $eml);
+	$self->{wcb}->(undef, {}, $eml);
 }
 
-sub net_cb { # callback for ->imap_each, ->nntp_each
+sub input_net_cb { # callback for ->imap_each, ->nntp_each
 	my (undef, undef, $kw, $eml, $self) = @_; # @_[0,1]: url + uid ignored
 	$self->{wcb}->(undef, { kw => $kw }, $eml);
 }
 
-sub mdir_cb {
-	my ($f, $kw, $eml, $self) = @_;
+sub input_maildir_cb {
+	my (undef, $kw, $eml, $self) = @_; # $_[0] $filename ignored
 	$self->{wcb}->(undef, { kw => $kw }, $eml);
 }
 
 sub do_convert { # via wq_do
 	my ($self) = @_;
-	my $lei = $self->{lei};
-	my $ifmt = $lei->{opt}->{'in-format'};
-	if (my $stdin = delete $self->{0}) {
-		$self->input_fh($ifmt, $stdin, '<stdin>');
-	}
+	$self->input_stdin;
 	for my $input (@{$self->{inputs}}) {
-		my $ifmt = lc($ifmt // '');
-		if ($input =~ m!\Aimaps?://!) {
-			$lei->{net}->imap_each($input, \&net_cb, $self);
-			next;
-		} elsif ($input =~ m!\A(?:nntps?|s?news)://!) {
-			$lei->{net}->nntp_each($input, \&net_cb, $self);
-			next;
-		} elsif ($input =~ s!\A([a-z0-9]+):!!i) {
-			$ifmt = lc $1;
-		}
-		if (-f $input) {
-			my $m = $lei->{opt}->{'lock'} //
-					($ifmt eq 'eml' ? ['none'] :
-					PublicInbox::MboxLock->defaults);
-			my $mbl = PublicInbox::MboxLock->acq($input, 0, $m);
-			$self->input_fh($ifmt, $mbl->{fh}, $input);
-		} elsif (-d _) {
-			PublicInbox::MdirReader::maildir_each_eml($input,
-							\&mdir_cb, $self);
-		} else {
-			die "BUG: $input unhandled"; # should've failed earlier
-		}
+		$self->input_path_url($input);
 	}
-	delete $lei->{1};
+	delete $self->{lei}->{1};
 	delete $self->{wcb}; # commit
 }
 
diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm
index 21af28a3..991c84f2 100644
--- a/lib/PublicInbox/LeiImport.pm
+++ b/lib/PublicInbox/LeiImport.pm
@@ -6,23 +6,33 @@ package PublicInbox::LeiImport;
 use strict;
 use v5.10.1;
 use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
-use PublicInbox::Eml;
-use PublicInbox::PktOp qw(pkt_do);
 
-sub eml_cb { # used by PublicInbox::LeiInput::input_fh
+# /^input_/ subs are used by (or override) PublicInbox::LeiInput superclass
+
+sub input_eml_cb { # used by PublicInbox::LeiInput::input_fh
 	my ($self, $eml, $vmd) = @_;
 	my $xoids = $self->{lei}->{ale}->xoids_for($eml);
 	$self->{lei}->{sto}->ipc_do('set_eml', $eml, $vmd, $xoids);
 }
 
-sub mbox_cb { # MboxReader callback used by PublicInbox::LeiInput::input_fh
+sub input_mbox_cb { # MboxReader callback
 	my ($eml, $self) = @_;
 	my $vmd;
 	if ($self->{-import_kw}) {
 		my $kw = PublicInbox::MboxReader::mbox_keywords($eml);
 		$vmd = { kw => $kw } if scalar(@$kw);
 	}
-	eml_cb($self, $eml, $vmd);
+	input_eml_cb($self, $eml, $vmd);
+}
+
+sub input_maildir_cb { # maildir_each_eml cb
+	my ($f, $kw, $eml, $self) = @_;
+	input_eml_cb($self, $eml, $self->{-import_kw} ? { kw => $kw } : undef);
+}
+
+sub input_net_cb { # imap_each, nntp_each cb
+	my ($url, $uid, $kw, $eml, $self) = @_;
+	input_eml_cb($self, $eml, $self->{-import_kw} ? { kw => $kw } : undef);
 }
 
 sub import_done_wait { # dwaitpid callback
@@ -43,7 +53,7 @@ sub import_done { # EOF callback for main daemon
 sub net_merge_complete { # callback used by LeiAuth
 	my ($self) = @_;
 	for my $input (@{$self->{inputs}}) {
-		$self->wq_io_do('import_path_url', [], $input);
+		$self->wq_io_do('input_path_url', [], $input);
 	}
 	$self->wq_close(1);
 }
@@ -63,7 +73,8 @@ sub import_start {
 	$lei->{auth}->op_merge($ops, $self) if $lei->{auth};
 	$self->{-wq_nr_workers} = $j // 1; # locked
 	my $op = $lei->workers_start($self, 'lei_import', undef, $ops);
-	$self->wq_io_do('import_stdin', []) if $self->{0};
+	$lei->{imp} = $self;
+	$self->wq_io_do('input_stdin', []) if $self->{0};
 	net_merge_complete($self) unless $lei->{auth};
 	while ($op && $op->{sock}) { $op->event_step }
 }
@@ -78,55 +89,6 @@ sub lei_import { # the main "lei import" method
 	import_start($lei);
 }
 
-sub _import_maildir { # maildir_each_eml cb
-	my ($f, $kw, $eml, $sto, $set_kw) = @_;
-	$sto->ipc_do('set_eml', $eml, $set_kw ? { kw => $kw }: ());
-}
-
-sub _import_net { # imap_each, nntp_each cb
-	my ($url, $uid, $kw, $eml, $sto, $set_kw) = @_;
-	$sto->ipc_do('set_eml', $eml, $set_kw ? { kw => $kw } : ());
-}
-
-sub import_path_url {
-	my ($self, $input) = @_;
-	my $lei = $self->{lei};
-	my $ifmt = lc($lei->{opt}->{'in-format'} // '');
-	# TODO auto-detect?
-	if ($input =~ m!\Aimaps?://!i) {
-		$lei->{net}->imap_each($input, \&_import_net, $lei->{sto},
-					$self->{-import_kw});
-		return;
-	} elsif ($input =~ m!\A(?:nntps?|s?news)://!i) {
-		$lei->{net}->nntp_each($input, \&_import_net, $lei->{sto}, 0);
-		return;
-	} elsif ($input =~ s!\A([a-z0-9]+):!!i) {
-		$ifmt = lc $1;
-	}
-	if (-f $input) {
-		my $m = $lei->{opt}->{'lock'} // ($ifmt eq 'eml' ? ['none'] :
-				PublicInbox::MboxLock->defaults);
-		my $mbl = PublicInbox::MboxLock->acq($input, 0, $m);
-		$self->input_fh($ifmt, $mbl->{fh}, $input);
-	} elsif (-d _ && (-d "$input/cur" || -d "$input/new")) {
-		return $lei->fail(<<EOM) if $ifmt && $ifmt ne 'maildir';
-$input appears to a be a maildir, not $ifmt
-EOM
-		PublicInbox::MdirReader::maildir_each_eml($input,
-					\&_import_maildir,
-					$lei->{sto}, $self->{-import_kw});
-	} else {
-		$lei->fail("$input unsupported (TODO)");
-	}
-}
-
-sub import_stdin {
-	my ($self) = @_;
-	my $lei = $self->{lei};
-	my $in = delete $self->{0};
-	$self->input_fh($lei->{opt}->{'in-format'}, $in, '<stdin>');
-}
-
 no warnings 'once';
 *ipc_atfork_child = \&PublicInbox::LeiInput::input_only_atfork_child;
 
diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm
index 2a4968d4..b059ecda 100644
--- a/lib/PublicInbox/LeiInput.pm
+++ b/lib/PublicInbox/LeiInput.pm
@@ -24,10 +24,11 @@ sub check_input_format ($;$) {
 }
 
 # import a single file handle of $name
-# Subclass must define ->eml_cb and ->mbox_cb
+# Subclass must define ->input_eml_cb and ->input_mbox_cb
 sub input_fh {
 	my ($self, $ifmt, $fh, $name, @args) = @_;
 	if ($ifmt eq 'eml') {
+		require PublicInbox::Eml;
 		my $buf = do { local $/; <$fh> } //
 			return $self->{lei}->child_error(1 << 8, <<"");
 error reading $name: $!
@@ -36,12 +37,50 @@ error reading $name: $!
 		# but no Content-Length or "From " escaping.
 		# "git format-patch" also generates such files by default.
 		$buf =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
-		$self->eml_cb(PublicInbox::Eml->new(\$buf), @args);
+		$self->input_eml_cb(PublicInbox::Eml->new(\$buf), @args);
 	} else {
 		# prepare_inputs already validated $ifmt
 		my $cb = PublicInbox::MboxReader->reads($ifmt) //
 				die "BUG: bad fmt=$ifmt";
-		$cb->(undef, $fh, $self->can('mbox_cb'), $self, @args);
+		$cb->(undef, $fh, $self->can('input_mbox_cb'), $self, @args);
+	}
+}
+
+sub input_stdin {
+	my ($self) = @_;
+	my $in = delete $self->{0} or return;
+	$self->input_fh($self->{lei}->{opt}->{'in-format'}, $in, '<stdin>');
+}
+
+sub input_path_url {
+	my ($self, $input, @args) = @_;
+	my $lei = $self->{lei};
+	my $ifmt = lc($lei->{opt}->{'in-format'} // '');
+	# TODO auto-detect?
+	if ($input =~ m!\Aimaps?://!i) {
+		$lei->{net}->imap_each($input, $self->can('input_net_cb'),
+					$self, @args);
+		return;
+	} elsif ($input =~ m!\A(?:nntps?|s?news)://!i) {
+		$lei->{net}->nntp_each($input, $self->can('input_net_cb'),
+					$self, @args);
+		return;
+	}
+	$input =~ s!\A([a-z0-9]+):!!i and $ifmt = lc($1);
+	if (-f $input) {
+		my $m = $lei->{opt}->{'lock'} // ($ifmt eq 'eml' ? ['none'] :
+				PublicInbox::MboxLock->defaults);
+		my $mbl = PublicInbox::MboxLock->acq($input, 0, $m);
+		$self->input_fh($ifmt, $mbl->{fh}, $input, @args);
+	} elsif (-d _ && (-d "$input/cur" || -d "$input/new")) {
+		return $lei->fail(<<EOM) if $ifmt && $ifmt ne 'maildir';
+$input appears to a be a maildir, not $ifmt
+EOM
+		PublicInbox::MdirReader::maildir_each_eml($input,
+					$self->can('input_maildir_cb'),
+					$self, @args);
+	} else {
+		$lei->fail("$input unsupported (TODO)");
 	}
 }
 
diff --git a/lib/PublicInbox/LeiMark.pm b/lib/PublicInbox/LeiMark.pm
index 7b50aa51..3b5e6c2c 100644
--- a/lib/PublicInbox/LeiMark.pm
+++ b/lib/PublicInbox/LeiMark.pm
@@ -6,8 +6,6 @@ package PublicInbox::LeiMark;
 use strict;
 use v5.10.1;
 use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
-use PublicInbox::Eml;
-use PublicInbox::PktOp qw(pkt_do);
 
 # JMAP RFC 8621 4.1.1
 my @KW = (qw(seen answered flagged draft), # system
@@ -34,7 +32,6 @@ my %ERR = (
 `$kw' is not one of: `seen', `flagged', `answered', `draft'
 `junk', `notjunk', `phishing' or `forwarded'
 EOM
-
 	}
 );
 
@@ -60,7 +57,7 @@ sub vmd_mod_extract {
 	$vmd_mod;
 }
 
-sub eml_cb { # used by PublicInbox::LeiInput::input_fh
+sub input_eml_cb { # used by PublicInbox::LeiInput::input_fh
 	my ($self, $eml) = @_;
 	if (my $xoids = $self->{lei}->{ale}->xoids_for($eml)) {
 		$self->{lei}->{sto}->ipc_do('update_xvmd', $xoids,
@@ -70,7 +67,7 @@ sub eml_cb { # used by PublicInbox::LeiInput::input_fh
 	}
 }
 
-sub mbox_cb { eml_cb($_[1], $_[0]) } # used by PublicInbox::LeiInput::input_fh
+sub input_mbox_cb { input_eml_cb($_[1], $_[0]) }
 
 sub mark_done_wait { # dwaitpid callback
 	my ($arg, $pid) = @_;
@@ -90,19 +87,19 @@ sub mark_done { # EOF callback for main daemon
 sub net_merge_complete { # callback used by LeiAuth
 	my ($self) = @_;
 	for my $input (@{$self->{inputs}}) {
-		$self->wq_io_do('mark_path_url', [], $input);
+		$self->wq_io_do('input_path_url', [], $input);
 	}
 	$self->wq_close(1);
 }
 
-sub _mark_maildir { # maildir_each_eml cb
+sub input_maildir_cb { # maildir_each_eml cb
 	my ($f, $kw, $eml, $self) = @_;
-	eml_cb($self, $eml);
+	input_eml_cb($self, $eml);
 }
 
-sub _mark_net { # imap_each, nntp_each cb
+sub input_net_cb { # imap_each, nntp_each cb
 	my ($url, $uid, $kw, $eml, $self) = @_;
-	eml_cb($self, $eml)
+	input_eml_cb($self, $eml);
 }
 
 sub lei_mark { # the "lei mark" method
@@ -120,48 +117,12 @@ sub lei_mark { # the "lei mark" method
 	$lei->{auth}->op_merge($ops, $self) if $lei->{auth};
 	$self->{vmd_mod} = $vmd_mod;
 	my $op = $lei->workers_start($self, 'lei_mark', 1, $ops);
-	$self->wq_io_do('mark_stdin', []) if $self->{0};
+	$lei->{mark} = $self;
+	$self->wq_io_do('input_stdin', []) if $self->{0};
 	net_merge_complete($self) unless $lei->{auth};
 	while ($op && $op->{sock}) { $op->event_step }
 }
 
-sub mark_path_url {
-	my ($self, $input) = @_;
-	my $lei = $self->{lei};
-	my $ifmt = lc($lei->{opt}->{'in-format'} // '');
-	# TODO auto-detect?
-	if ($input =~ m!\Aimaps?://!i) {
-		$lei->{net}->imap_each($input, \&_mark_net, $self);
-		return;
-	} elsif ($input =~ m!\A(?:nntps?|s?news)://!i) {
-		$lei->{net}->nntp_each($input, \&_mark_net, $self);
-		return;
-	} elsif ($input =~ s!\A([a-z0-9]+):!!i) {
-		$ifmt = lc $1;
-	}
-	if (-f $input) {
-		my $m = $lei->{opt}->{'lock'} // ($ifmt eq 'eml' ? ['none'] :
-				PublicInbox::MboxLock->defaults);
-		my $mbl = PublicInbox::MboxLock->acq($input, 0, $m);
-		$self->input_fh($ifmt, $mbl->{fh}, $input);
-	} elsif (-d _ && (-d "$input/cur" || -d "$input/new")) {
-		return $lei->fail(<<EOM) if $ifmt && $ifmt ne 'maildir';
-$input appears to a be a maildir, not $ifmt
-EOM
-		PublicInbox::MdirReader::maildir_each_eml($input,
-					\&_mark_maildir, $self);
-	} else {
-		$lei->fail("$input unsupported (TODO)");
-	}
-}
-
-sub mark_stdin {
-	my ($self) = @_;
-	my $lei = $self->{lei};
-	my $in = delete $self->{0};
-	$self->input_fh($lei->{opt}->{'in-format'}, $in, '<stdin>');
-}
-
 sub note_missing {
 	my ($self) = @_;
 	$self->{lei}->child_error(1 << 8) if $self->{missing};

  parent reply	other threads:[~2021-03-23 11:48 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-23 11:48 [PATCH 0/5] lei: more input + worker-related stuff Eric Wong
2021-03-23 11:48 ` [PATCH 1/5] net_reader: nntp_each: pass keywords as `undef' Eric Wong
2021-03-23 11:48 ` [PATCH 2/5] test_common: check lei/errors.log Eric Wong
2021-03-23 11:48 ` [PATCH 3/5] lei: persistent workers (lei_store) run in / Eric Wong
2021-03-23 11:48 ` Eric Wong [this message]
2021-03-23 11:48 ` [PATCH 5/5] lei: improve management around short-lived workers Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210323114808.7605-5-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    --subject='Re: [PATCH 4/5] lei_input: more common code between <mark|convert|import>' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

user/dev discussion of public-inbox itself

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://public-inbox.org/meta
	git clone --mirror http://czquwvybam4bgbro.onion/meta
	git clone --mirror http://hjrcffqmbrq6wope.onion/meta
	git clone --mirror http://ou63pmih66umazou.onion/meta

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V1 meta meta/ https://public-inbox.org/meta \
		meta@public-inbox.org
	public-inbox-index meta

Example config snippet for mirrors.
Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta
	nntp://7fh6tueqddpjyxjmgtdiueylzoqt6pt7hec3pukyptlmohoowvhde4yd.onion/inbox.comp.mail.public-inbox.meta
	nntp://ie5yzdi7fg72h7s4sdcztq5evakq23rdt33mfyfcddc5u3ndnw24ogqd.onion/inbox.comp.mail.public-inbox.meta
	nntp://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/inbox.comp.mail.public-inbox.meta
	nntp://news.gmane.io/gmane.mail.public-inbox.general
 note: .onion URLs require Tor: https://www.torproject.org/

code repositories for project(s) associated with this inbox:

	https://80x24.org/public-inbox.git

AGPL code for this site: git clone https://public-inbox.org/public-inbox.git