From: "Eric Wong (Contractor, The Linux Foundation)" <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 02/13] introduce InboxWritable class
Date: Thu, 22 Mar 2018 09:40:04 +0000 [thread overview]
Message-ID: <20180322094015.14422-3-e@80x24.org> (raw)
In-Reply-To: <20180322094015.14422-1-e@80x24.org>
This code will be shared with future mass-import tools.
---
MANIFEST | 1 +
lib/PublicInbox/InboxWritable.pm | 57 ++++++++++++++++++++++++++++++++++++++++
lib/PublicInbox/WatchMaildir.pm | 50 +++++++----------------------------
3 files changed, 67 insertions(+), 41 deletions(-)
create mode 100644 lib/PublicInbox/InboxWritable.pm
diff --git a/MANIFEST b/MANIFEST
index 4346cd9..567148a 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -66,6 +66,7 @@ lib/PublicInbox/HTTPD/Async.pm
lib/PublicInbox/Hval.pm
lib/PublicInbox/Import.pm
lib/PublicInbox/Inbox.pm
+lib/PublicInbox/InboxWritable.pm
lib/PublicInbox/Linkify.pm
lib/PublicInbox/Listener.pm
lib/PublicInbox/Lock.pm
diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm
new file mode 100644
index 0000000..0a976ea
--- /dev/null
+++ b/lib/PublicInbox/InboxWritable.pm
@@ -0,0 +1,57 @@
+# Copyright (C) 2018 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Extends read-only Inbox for writing
+package PublicInbox::InboxWritable;
+use strict;
+use warnings;
+use base qw(PublicInbox::Inbox);
+use PublicInbox::Import;
+
+sub new {
+ my ($class, $ibx) = @_;
+ bless $ibx, $class;
+}
+
+sub importer {
+ my ($self, $parallel) = @_;
+ $self->{-importer} ||= eval {
+ my $v = $self->{version} || 1;
+ if ($v == 2) {
+ eval { require PublicInbox::V2Writable };
+ die "v2 not supported: $@\n" if $@;
+ my $v2w = PublicInbox::V2Writable->new($self);
+ $v2w->{parallel} = $parallel;
+ $v2w;
+ } elsif ($v == 1) {
+ my $git = $self->git;
+ my $name = $self->{name};
+ my $addr = $self->{-primary_address};
+ PublicInbox::Import->new($git, $name, $addr, $self);
+ } else {
+ die "unsupported inbox version: $v\n";
+ }
+ }
+}
+
+sub filter {
+ my ($self) = @_;
+ my $f = $self->{filter};
+ if ($f && $f =~ /::/) {
+ my @args = (-inbox => $self);
+ # basic line splitting, only
+ # Perhaps we can have proper quote splitting one day...
+ ($f, @args) = split(/\s+/, $f) if $f =~ /\s+/;
+
+ eval "require $f";
+ if ($@) {
+ warn $@;
+ } else {
+ # e.g: PublicInbox::Filter::Vger->new(@args)
+ return $f->new(@args);
+ }
+ }
+ undef;
+}
+
+1;
diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
index e28e602..b165a60 100644
--- a/lib/PublicInbox/WatchMaildir.pm
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -11,6 +11,7 @@ use PublicInbox::Git;
use PublicInbox::Import;
use PublicInbox::MDA;
use PublicInbox::Spawn qw(spawn);
+use PublicInbox::InboxWritable;
use File::Temp qw//;
sub new {
@@ -50,6 +51,10 @@ sub new {
$spamcheck = undef;
}
}
+
+ # need to make all inboxes writable for spam removal:
+ $config->each_inbox(sub { PublicInbox::InboxWritable->new($_[0]) });
+
foreach $k (keys %$config) {
$k =~ /\Apublicinbox\.([^\.]+)\.watch\z/ or next;
my $name = $1;
@@ -118,7 +123,7 @@ sub _remove_spam {
eval {
my $im = _importer_for($self, $ibx);
$im->remove($mime, 'spam');
- if (my $scrub = _scrubber_for($ibx)) {
+ if (my $scrub = $ibx->filter) {
my $scrubbed = $scrub->scrub($mime) or return;
$scrubbed == 100 and return;
$im->remove($scrubbed, 'spam');
@@ -160,7 +165,7 @@ sub _try_path {
my $v = $mime->header_obj->header_raw($wm->[0]);
return unless ($v && $v =~ $wm->[1]);
}
- if (my $scrub = _scrubber_for($inbox)) {
+ if (my $scrub = $inbox->filter) {
my $ret = $scrub->scrub($mime) or return;
$ret == 100 and return;
$mime = $ret;
@@ -253,25 +258,8 @@ sub _path_to_mime {
}
sub _importer_for {
- my ($self, $inbox) = @_;
- my $im = $inbox->{-import} ||= eval {
- my $v = $inbox->{version} || 1;
- if ($v == 2) {
- eval { require PublicInbox::V2Writable };
- die "v2 not supported: $@\n" if $@;
- my $v2w = PublicInbox::V2Writable->new($inbox);
- $v2w->{parallel} = 0;
- $v2w;
- } elsif ($v == 1) {
- my $git = $inbox->git;
- my $name = $inbox->{name};
- my $addr = $inbox->{-primary_address};
- PublicInbox::Import->new($git, $name, $addr, $inbox);
- } else {
- die "unsupported inbox version: $v\n";
- }
- };
-
+ my ($self, $ibx) = @_;
+ my $im = $ibx->importer(0);
my $importers = $self->{importers};
if (scalar(keys(%$importers)) > 2) {
delete $importers->{"$im"};
@@ -281,26 +269,6 @@ sub _importer_for {
$importers->{"$im"} = $im;
}
-sub _scrubber_for {
- my ($inbox) = @_;
- my $f = $inbox->{filter};
- if ($f && $f =~ /::/) {
- my @args = (-inbox => $inbox);
- # basic line splitting, only
- # Perhaps we can have proper quote splitting one day...
- ($f, @args) = split(/\s+/, $f) if $f =~ /\s+/;
-
- eval "require $f";
- if ($@) {
- warn $@;
- } else {
- # e.g: PublicInbox::Filter::Vger->new(@args)
- return $f->new(@args);
- }
- }
- undef;
-}
-
sub _spamcheck_cb {
my ($sc) = @_;
sub {
--
EW
next prev parent reply other threads:[~2018-03-22 9:40 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-03-22 9:40 [PATCH 00/13] reindexing, feeds, date fixes Eric Wong (Contractor, The Linux Foundation)
2018-03-22 9:40 ` [PATCH 01/13] content_id: do not take Message-Id into account Eric Wong (Contractor, The Linux Foundation)
2018-03-22 9:40 ` Eric Wong (Contractor, The Linux Foundation) [this message]
2018-03-22 9:40 ` [PATCH 03/13] import: discard all the same headers as MDA Eric Wong (Contractor, The Linux Foundation)
2018-03-22 9:40 ` [PATCH 04/13] InboxWritable: add mbox/maildir parsing + import logic Eric Wong (Contractor, The Linux Foundation)
2018-03-22 9:40 ` [PATCH 05/13] use both Date: and Received: times Eric Wong (Contractor, The Linux Foundation)
2018-03-22 9:40 ` [PATCH 06/13] msgmap: add tmp_clone to create an anonymous copy Eric Wong (Contractor, The Linux Foundation)
2018-03-22 9:40 ` [PATCH 07/13] fix syntax warnings Eric Wong (Contractor, The Linux Foundation)
2018-03-22 9:40 ` [PATCH 08/13] v2writable: support reindexing Xapian Eric Wong (Contractor, The Linux Foundation)
2018-03-26 20:08 ` Eric Wong
2018-03-22 9:40 ` [PATCH 09/13] t/altid.t: extra tests for mid_set Eric Wong (Contractor, The Linux Foundation)
2018-03-22 9:40 ` [PATCH 10/13] v2writable: add NNTP article number regeneration support Eric Wong (Contractor, The Linux Foundation)
2018-03-22 9:40 ` [PATCH 11/13] v2writable: clarify header cleanups Eric Wong (Contractor, The Linux Foundation)
2018-03-22 9:40 ` [PATCH 12/13] v2writable: DEBUG_DIFF respects $TMPDIR Eric Wong (Contractor, The Linux Foundation)
2018-03-22 9:40 ` [PATCH 13/13] feed: $INBOX/new.atom endpoint supports v2 inboxes Eric Wong (Contractor, The Linux Foundation)
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180322094015.14422-3-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).