user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: "Eric Wong (Contractor, The Linux Foundation)" <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 02/13] introduce InboxWritable class
Date: Thu, 22 Mar 2018 09:40:04 +0000	[thread overview]
Message-ID: <20180322094015.14422-3-e@80x24.org> (raw)
In-Reply-To: <20180322094015.14422-1-e@80x24.org>

This code will be shared with future mass-import tools.
---
 MANIFEST                         |  1 +
 lib/PublicInbox/InboxWritable.pm | 57 ++++++++++++++++++++++++++++++++++++++++
 lib/PublicInbox/WatchMaildir.pm  | 50 +++++++----------------------------
 3 files changed, 67 insertions(+), 41 deletions(-)
 create mode 100644 lib/PublicInbox/InboxWritable.pm

diff --git a/MANIFEST b/MANIFEST
index 4346cd9..567148a 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -66,6 +66,7 @@ lib/PublicInbox/HTTPD/Async.pm
 lib/PublicInbox/Hval.pm
 lib/PublicInbox/Import.pm
 lib/PublicInbox/Inbox.pm
+lib/PublicInbox/InboxWritable.pm
 lib/PublicInbox/Linkify.pm
 lib/PublicInbox/Listener.pm
 lib/PublicInbox/Lock.pm
diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm
new file mode 100644
index 0000000..0a976ea
--- /dev/null
+++ b/lib/PublicInbox/InboxWritable.pm
@@ -0,0 +1,57 @@
+# Copyright (C) 2018 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Extends read-only Inbox for writing
+package PublicInbox::InboxWritable;
+use strict;
+use warnings;
+use base qw(PublicInbox::Inbox);
+use PublicInbox::Import;
+
+sub new {
+	my ($class, $ibx) = @_;
+	bless $ibx, $class;
+}
+
+sub importer {
+	my ($self, $parallel) = @_;
+	$self->{-importer} ||= eval {
+		my $v = $self->{version} || 1;
+		if ($v == 2) {
+			eval { require PublicInbox::V2Writable };
+			die "v2 not supported: $@\n" if $@;
+			my $v2w = PublicInbox::V2Writable->new($self);
+			$v2w->{parallel} = $parallel;
+			$v2w;
+		} elsif ($v == 1) {
+			my $git = $self->git;
+			my $name = $self->{name};
+			my $addr = $self->{-primary_address};
+			PublicInbox::Import->new($git, $name, $addr, $self);
+		} else {
+			die "unsupported inbox version: $v\n";
+		}
+	}
+}
+
+sub filter {
+	my ($self) = @_;
+	my $f = $self->{filter};
+	if ($f && $f =~ /::/) {
+		my @args = (-inbox => $self);
+		# basic line splitting, only
+		# Perhaps we can have proper quote splitting one day...
+		($f, @args) = split(/\s+/, $f) if $f =~ /\s+/;
+
+		eval "require $f";
+		if ($@) {
+			warn $@;
+		} else {
+			# e.g: PublicInbox::Filter::Vger->new(@args)
+			return $f->new(@args);
+		}
+	}
+	undef;
+}
+
+1;
diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
index e28e602..b165a60 100644
--- a/lib/PublicInbox/WatchMaildir.pm
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -11,6 +11,7 @@ use PublicInbox::Git;
 use PublicInbox::Import;
 use PublicInbox::MDA;
 use PublicInbox::Spawn qw(spawn);
+use PublicInbox::InboxWritable;
 use File::Temp qw//;
 
 sub new {
@@ -50,6 +51,10 @@ sub new {
 			$spamcheck = undef;
 		}
 	}
+
+	# need to make all inboxes writable for spam removal:
+	$config->each_inbox(sub { PublicInbox::InboxWritable->new($_[0]) });
+
 	foreach $k (keys %$config) {
 		$k =~ /\Apublicinbox\.([^\.]+)\.watch\z/ or next;
 		my $name = $1;
@@ -118,7 +123,7 @@ sub _remove_spam {
 		eval {
 			my $im = _importer_for($self, $ibx);
 			$im->remove($mime, 'spam');
-			if (my $scrub = _scrubber_for($ibx)) {
+			if (my $scrub = $ibx->filter) {
 				my $scrubbed = $scrub->scrub($mime) or return;
 				$scrubbed == 100 and return;
 				$im->remove($scrubbed, 'spam');
@@ -160,7 +165,7 @@ sub _try_path {
 		my $v = $mime->header_obj->header_raw($wm->[0]);
 		return unless ($v && $v =~ $wm->[1]);
 	}
-	if (my $scrub = _scrubber_for($inbox)) {
+	if (my $scrub = $inbox->filter) {
 		my $ret = $scrub->scrub($mime) or return;
 		$ret == 100 and return;
 		$mime = $ret;
@@ -253,25 +258,8 @@ sub _path_to_mime {
 }
 
 sub _importer_for {
-	my ($self, $inbox) = @_;
-	my $im = $inbox->{-import} ||= eval {
-		my $v = $inbox->{version} || 1;
-		if ($v == 2) {
-			eval { require PublicInbox::V2Writable };
-			die "v2 not supported: $@\n" if $@;
-			my $v2w = PublicInbox::V2Writable->new($inbox);
-			$v2w->{parallel} = 0;
-			$v2w;
-		} elsif ($v == 1) {
-			my $git = $inbox->git;
-			my $name = $inbox->{name};
-			my $addr = $inbox->{-primary_address};
-			PublicInbox::Import->new($git, $name, $addr, $inbox);
-		} else {
-			die "unsupported inbox version: $v\n";
-		}
-	};
-
+	my ($self, $ibx) = @_;
+	my $im = $ibx->importer(0);
 	my $importers = $self->{importers};
 	if (scalar(keys(%$importers)) > 2) {
 		delete $importers->{"$im"};
@@ -281,26 +269,6 @@ sub _importer_for {
 	$importers->{"$im"} = $im;
 }
 
-sub _scrubber_for {
-	my ($inbox) = @_;
-	my $f = $inbox->{filter};
-	if ($f && $f =~ /::/) {
-		my @args = (-inbox => $inbox);
-		# basic line splitting, only
-		# Perhaps we can have proper quote splitting one day...
-		($f, @args) = split(/\s+/, $f) if $f =~ /\s+/;
-
-		eval "require $f";
-		if ($@) {
-			warn $@;
-		} else {
-			# e.g: PublicInbox::Filter::Vger->new(@args)
-			return $f->new(@args);
-		}
-	}
-	undef;
-}
-
 sub _spamcheck_cb {
 	my ($sc) = @_;
 	sub {
-- 
EW


  parent reply	other threads:[~2018-03-22  9:40 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-22  9:40 [PATCH 00/13] reindexing, feeds, date fixes Eric Wong (Contractor, The Linux Foundation)
2018-03-22  9:40 ` [PATCH 01/13] content_id: do not take Message-Id into account Eric Wong (Contractor, The Linux Foundation)
2018-03-22  9:40 ` Eric Wong (Contractor, The Linux Foundation) [this message]
2018-03-22  9:40 ` [PATCH 03/13] import: discard all the same headers as MDA Eric Wong (Contractor, The Linux Foundation)
2018-03-22  9:40 ` [PATCH 04/13] InboxWritable: add mbox/maildir parsing + import logic Eric Wong (Contractor, The Linux Foundation)
2018-03-22  9:40 ` [PATCH 05/13] use both Date: and Received: times Eric Wong (Contractor, The Linux Foundation)
2018-03-22  9:40 ` [PATCH 06/13] msgmap: add tmp_clone to create an anonymous copy Eric Wong (Contractor, The Linux Foundation)
2018-03-22  9:40 ` [PATCH 07/13] fix syntax warnings Eric Wong (Contractor, The Linux Foundation)
2018-03-22  9:40 ` [PATCH 08/13] v2writable: support reindexing Xapian Eric Wong (Contractor, The Linux Foundation)
2018-03-26 20:08   ` Eric Wong
2018-03-22  9:40 ` [PATCH 09/13] t/altid.t: extra tests for mid_set Eric Wong (Contractor, The Linux Foundation)
2018-03-22  9:40 ` [PATCH 10/13] v2writable: add NNTP article number regeneration support Eric Wong (Contractor, The Linux Foundation)
2018-03-22  9:40 ` [PATCH 11/13] v2writable: clarify header cleanups Eric Wong (Contractor, The Linux Foundation)
2018-03-22  9:40 ` [PATCH 12/13] v2writable: DEBUG_DIFF respects $TMPDIR Eric Wong (Contractor, The Linux Foundation)
2018-03-22  9:40 ` [PATCH 13/13] feed: $INBOX/new.atom endpoint supports v2 inboxes Eric Wong (Contractor, The Linux Foundation)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180322094015.14422-3-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).