From 54590383027a67d11953690cbb6390347757730b Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Tue, 20 Mar 2018 21:00:00 +0000 Subject: InboxWritable: add mbox/maildir parsing + import logic This will make it easier to as well as supporting future Filter API users. It allows simplifying our ad-hoc import_vger_from_mbox script. --- lib/PublicInbox/InboxWritable.pm | 103 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) (limited to 'lib/PublicInbox/InboxWritable.pm') diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm index 0a976ea2..82834f08 100644 --- a/lib/PublicInbox/InboxWritable.pm +++ b/lib/PublicInbox/InboxWritable.pm @@ -7,6 +7,8 @@ use strict; use warnings; use base qw(PublicInbox::Inbox); use PublicInbox::Import; +use PublicInbox::Filter::Base; +*REJECT = *PublicInbox::Filter::Base::REJECT; sub new { my ($class, $ibx) = @_; @@ -54,4 +56,105 @@ sub filter { undef; } +sub is_maildir_basename ($) { + my ($bn) = @_; + return 0 if $bn !~ /\A[a-zA-Z0-9][\-\w:,=\.]+\z/; + if ($bn =~ /:2,([A-Z]+)\z/i) { + my $flags = $1; + return 0 if $flags =~ /[DT]/; # no [D]rafts or [T]rashed mail + } + 1; +} + +sub is_maildir_path ($) { + my ($path) = @_; + my @p = split(m!/+!, $path); + (is_maildir_basename($p[-1]) && -f $path) ? 1 : 0; +} + +sub maildir_path_load ($) { + my ($path) = @_; + if (open my $fh, '<', $path) { + local $/; + my $str = <$fh>; + $str or return; + return PublicInbox::MIME->new(\$str); + } elsif ($!{ENOENT}) { + # common with Maildir + return; + } else { + warn "failed to open $path: $!\n"; + return; + } +} + +sub import_maildir { + my ($self, $dir) = @_; + my $im = $self->importer(1); + my $filter = $self->filter; + foreach my $sub (qw(cur new tmp)) { + -d "$dir/$sub" or die "$dir is not a Maildir (missing $sub)\n"; + } + foreach my $sub (qw(cur new)) { + opendir my $dh, "$dir/$sub" or die "opendir $dir/$sub: $!\n"; + while (defined(my $fn = readdir($dh))) { + next unless is_maildir_basename($fn); + my $mime = maildir_file_load("$dir/$fn") or next; + if ($filter) { + my $ret = $filter->scrub($mime) or return; + return if $ret == REJECT(); + $mime = $ret; + } + $im->add($mime); + } + } + $im->done; +} + +# asctime: From example@example.com Fri Jun 23 02:56:55 2000 +my $from_strict = qr/^From \S+ +\S+ \S+ +\S+ [^:]+:[^:]+:[^:]+ [^:]+/; + +sub mb_add ($$$$) { + my ($im, $variant, $filter, $msg) = @_; + $$msg =~ s/(\r?\n)+\z/$1/s; + my $mime = PublicInbox::MIME->new($msg); + if ($variant eq 'mboxrd') { + $$msg =~ s/^>(>*From )/$1/sm; + } elsif ($variant eq 'mboxo') { + $$msg =~ s/^>From /From /sm; + } + if ($filter) { + my $ret = $filter->scrub($mime) or return; + return if $ret == REJECT(); + $mime = $ret; + } + $im->add($mime) +} + +sub import_mbox { + my ($self, $fh, $variant) = @_; + if ($variant !~ /\A(?:mboxrd|mboxo)\z/) { + die "variant must be 'mboxrd' or 'mboxo'\n"; + } + my $im = $self->importer(1); + my $prev = undef; + my $msg = ''; + my $filter = $self->filter; + while (defined(my $l = <$fh>)) { + if ($l =~ /$from_strict/o) { + if (!defined($prev) || $prev =~ /^\r?$/) { + mb_add($im, $variant, $filter, \$msg) if $msg; + $msg = ''; + $prev = $l; + next; + } + warn "W[$.] $l\n"; + } + $prev = $l; + $msg .= $l; + } + mb_add($im, $variant, $filter, \$msg) if $msg; + $im->done; +} + 1; -- cgit v1.2.3-24-ge0c7