diff options
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r-- | lib/PublicInbox/InboxWritable.pm | 103 | ||||
-rw-r--r-- | lib/PublicInbox/V2Writable.pm | 8 | ||||
-rw-r--r-- | lib/PublicInbox/WatchMaildir.pm | 20 |
3 files changed, 119 insertions, 12 deletions
diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm index 0a976ea2..82834f08 100644 --- a/lib/PublicInbox/InboxWritable.pm +++ b/lib/PublicInbox/InboxWritable.pm @@ -7,6 +7,8 @@ use strict; use warnings; use base qw(PublicInbox::Inbox); use PublicInbox::Import; +use PublicInbox::Filter::Base; +*REJECT = *PublicInbox::Filter::Base::REJECT; sub new { my ($class, $ibx) = @_; @@ -54,4 +56,105 @@ sub filter { undef; } +sub is_maildir_basename ($) { + my ($bn) = @_; + return 0 if $bn !~ /\A[a-zA-Z0-9][\-\w:,=\.]+\z/; + if ($bn =~ /:2,([A-Z]+)\z/i) { + my $flags = $1; + return 0 if $flags =~ /[DT]/; # no [D]rafts or [T]rashed mail + } + 1; +} + +sub is_maildir_path ($) { + my ($path) = @_; + my @p = split(m!/+!, $path); + (is_maildir_basename($p[-1]) && -f $path) ? 1 : 0; +} + +sub maildir_path_load ($) { + my ($path) = @_; + if (open my $fh, '<', $path) { + local $/; + my $str = <$fh>; + $str or return; + return PublicInbox::MIME->new(\$str); + } elsif ($!{ENOENT}) { + # common with Maildir + return; + } else { + warn "failed to open $path: $!\n"; + return; + } +} + +sub import_maildir { + my ($self, $dir) = @_; + my $im = $self->importer(1); + my $filter = $self->filter; + foreach my $sub (qw(cur new tmp)) { + -d "$dir/$sub" or die "$dir is not a Maildir (missing $sub)\n"; + } + foreach my $sub (qw(cur new)) { + opendir my $dh, "$dir/$sub" or die "opendir $dir/$sub: $!\n"; + while (defined(my $fn = readdir($dh))) { + next unless is_maildir_basename($fn); + my $mime = maildir_file_load("$dir/$fn") or next; + if ($filter) { + my $ret = $filter->scrub($mime) or return; + return if $ret == REJECT(); + $mime = $ret; + } + $im->add($mime); + } + } + $im->done; +} + +# asctime: From example@example.com Fri Jun 23 02:56:55 2000 +my $from_strict = qr/^From \S+ +\S+ \S+ +\S+ [^:]+:[^:]+:[^:]+ [^:]+/; + +sub mb_add ($$$$) { + my ($im, $variant, $filter, $msg) = @_; + $$msg =~ s/(\r?\n)+\z/$1/s; + my $mime = PublicInbox::MIME->new($msg); + if ($variant eq 'mboxrd') { + $$msg =~ s/^>(>*From )/$1/sm; + } elsif ($variant eq 'mboxo') { + $$msg =~ s/^>From /From /sm; + } + if ($filter) { + my $ret = $filter->scrub($mime) or return; + return if $ret == REJECT(); + $mime = $ret; + } + $im->add($mime) +} + +sub import_mbox { + my ($self, $fh, $variant) = @_; + if ($variant !~ /\A(?:mboxrd|mboxo)\z/) { + die "variant must be 'mboxrd' or 'mboxo'\n"; + } + my $im = $self->importer(1); + my $prev = undef; + my $msg = ''; + my $filter = $self->filter; + while (defined(my $l = <$fh>)) { + if ($l =~ /$from_strict/o) { + if (!defined($prev) || $prev =~ /^\r?$/) { + mb_add($im, $variant, $filter, \$msg) if $msg; + $msg = ''; + $prev = $l; + next; + } + warn "W[$.] $l\n"; + } + $prev = $l; + $msg .= $l; + } + mb_add($im, $variant, $filter, \$msg) if $msg; + $im->done; +} + 1; diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index dc96b87a..46bfebbd 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -65,6 +65,14 @@ sub new { bless $self, $class; } +sub init_inbox { + my ($self, $parallel) = @_; + $self->{parallel} = $parallel; + $self->idx_init; + $self->git_init(0); + $self->done; +} + # returns undef on duplicate or spam # mimics Import::add and wraps it for v2 sub add { diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm index d3ca2a16..7ee29da5 100644 --- a/lib/PublicInbox/WatchMaildir.pm +++ b/lib/PublicInbox/WatchMaildir.pm @@ -13,6 +13,8 @@ use PublicInbox::MDA; use PublicInbox::Spawn qw(spawn); use PublicInbox::InboxWritable; use File::Temp qw//; +use PublicInbox::Filter::Base; +*REJECT = *PublicInbox::Filter::Base::REJECT; sub new { my ($class, $config) = @_; @@ -125,7 +127,7 @@ sub _remove_spam { $im->remove($mime, 'spam'); if (my $scrub = $ibx->filter) { my $scrubbed = $scrub->scrub($mime) or return; - $scrubbed == 100 and return; + $scrubbed == REJECT() and return; $im->remove($scrubbed, 'spam'); } }; @@ -138,13 +140,7 @@ sub _remove_spam { sub _try_path { my ($self, $path) = @_; - my @p = split(m!/+!, $path); - return if $p[-1] !~ /\A[a-zA-Z0-9][\-\w:,=\.]+\z/; - if ($p[-1] =~ /:2,([A-Z]+)\z/i) { - my $flags = $1; - return if $flags =~ /[DT]/; # no [D]rafts or [T]rashed mail - } - return unless -f $path; + return unless PublicInbox::InboxWritable::is_maildir_path($path); if ($path !~ $self->{mdre}) { warn "unrecognized path: $path\n"; return; @@ -166,7 +162,7 @@ sub _try_path { } if (my $scrub = $inbox->filter) { my $ret = $scrub->scrub($mime) or return; - $ret == 100 and return; + $ret == REJECT() and return; $mime = $ret; } @@ -258,14 +254,14 @@ sub _path_to_mime { sub _importer_for { my ($self, $ibx) = @_; - my $im = $ibx->importer(0); my $importers = $self->{importers}; + my $im = $importers->{"$ibx"} ||= $ibx->importer(0); if (scalar(keys(%$importers)) > 2) { - delete $importers->{"$im"}; + delete $importers->{"$ibx"}; _done_for_now($self); } - $importers->{"$im"} = $im; + $importers->{"$ibx"} = $im; } sub _spamcheck_cb { |