From 1138a7a940ec40ad4394aa55413456a6f6fd341e Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 12 Feb 2021 00:05:51 -0700 Subject: import_mbox: use MboxReader It supports more mbox variants and it's trailing newline behavior is probably more correct despite the previous change to PublicInbox::Filter::Vger. --- lib/PublicInbox/InboxWritable.pm | 44 ++++++++-------------------------------- 1 file changed, 9 insertions(+), 35 deletions(-) (limited to 'lib/PublicInbox/InboxWritable.pm') diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm index c3acc4f9..d4a9040f 100644 --- a/lib/PublicInbox/InboxWritable.pm +++ b/lib/PublicInbox/InboxWritable.pm @@ -158,49 +158,23 @@ sub import_maildir { $im->done; } -# asctime: From example@example.com Fri Jun 23 02:56:55 2000 -my $from_strict = qr/^From \S+ +\S+ \S+ +\S+ [^:]+:[^:]+:[^:]+ [^:]+/; - -sub mb_add ($$$$) { - my ($im, $variant, $filter, $msg) = @_; - $$msg =~ s/(\r?\n)+\z/$1/s; - if ($variant eq 'mboxrd') { - $$msg =~ s/^>(>*From )/$1/gms; - } elsif ($variant eq 'mboxo') { - $$msg =~ s/^>From /From /gms; - } - my $mime = PublicInbox::Eml->new($msg); +sub _mbox_eml_cb { # MboxReader->mbox* callback + my ($eml, $im, $filter) = @_; if ($filter) { - my $ret = $filter->scrub($mime) or return; + my $ret = $filter->scrub($eml) or return; return if $ret == REJECT(); - $mime = $ret; + $eml = $ret; } - $im->add($mime) + $im->add($eml); } sub import_mbox { my ($self, $fh, $variant) = @_; - if ($variant !~ /\A(?:mboxrd|mboxo)\z/) { - die "variant must be 'mboxrd' or 'mboxo'\n"; - } + require PublicInbox::MboxReader; + my $cb = PublicInbox::MboxReader->can($variant) or + die "$variant not supported\n"; my $im = $self->importer(1); - my $prev = undef; - my $msg = ''; - my $filter = $self->filter; - while (defined(my $l = <$fh>)) { - if ($l =~ /$from_strict/o) { - if (!defined($prev) || $prev =~ /^\r?$/) { - mb_add($im, $variant, $filter, \$msg) if $msg; - $msg = ''; - $prev = $l; - next; - } - warn "W[$.] $l\n"; - } - $prev = $l; - $msg .= $l; - } - mb_add($im, $variant, $filter, \$msg) if $msg; + $cb->(undef, $fh, \&_mbox_eml_cb, $im, $self->filter); $im->done; } -- cgit v1.2.3-24-ge0c7