From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id C25D01FB0F for ; Tue, 9 Feb 2021 08:09:38 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 07/11] use MdirReader in -watch and InboxWritable Date: Tue, 9 Feb 2021 07:09:33 -0100 Message-Id: <20210209080937.4678-8-e@80x24.org> In-Reply-To: <20210209080937.4678-1-e@80x24.org> References: <20210209080937.4678-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: MdirReader now handles files in "$MAILDIR/new" properly and is stricter about what it accepts. eml_from_path is also made robust against FIFOs while eliminating TOCTOU races with between stat(2) and open(2) calls. --- MANIFEST | 1 + lib/PublicInbox/InboxWritable.pm | 55 +++++++++++++------------------- lib/PublicInbox/MdirReader.pm | 22 +++++++++++-- lib/PublicInbox/Watch.pm | 6 ++-- t/mdir_reader.t | 22 +++++++++++++ 5 files changed, 69 insertions(+), 37 deletions(-) create mode 100644 t/mdir_reader.t diff --git a/MANIFEST b/MANIFEST index 6b3fc812..f8ee6998 100644 --- a/MANIFEST +++ b/MANIFEST @@ -376,6 +376,7 @@ t/mbox_reader.t t/mda-mime.eml t/mda.t t/mda_filter_rubylang.t +t/mdir_reader.t t/mid.t t/mime.t t/miscsearch.t diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm index 3a4012cd..c3acc4f9 100644 --- a/lib/PublicInbox/InboxWritable.pm +++ b/lib/PublicInbox/InboxWritable.pm @@ -10,6 +10,7 @@ use PublicInbox::Import; use PublicInbox::Filter::Base qw(REJECT); use Errno qw(ENOENT); our @EXPORT_OK = qw(eml_from_path); +use Fcntl qw(O_RDONLY O_NONBLOCK); use constant { PERM_UMASK => 0, @@ -118,25 +119,10 @@ sub filter { undef; } -sub is_maildir_basename ($) { - my ($bn) = @_; - return 0 if $bn !~ /\A[a-zA-Z0-9][\-\w:,=\.]+\z/; - if ($bn =~ /:2,([A-Z]+)\z/i) { - my $flags = $1; - return 0 if $flags =~ /[DT]/; # no [D]rafts or [T]rashed mail - } - 1; -} - -sub is_maildir_path ($) { - my ($path) = @_; - my @p = split(m!/+!, $path); - (is_maildir_basename($p[-1]) && -f $path) ? 1 : 0; -} - sub eml_from_path ($) { my ($path) = @_; - if (open my $fh, '<', $path) { + if (sysopen(my $fh, $path, O_RDONLY|O_NONBLOCK)) { + return unless -f $fh; # no FIFOs or directories my $str = do { local $/; <$fh> } or return; PublicInbox::Eml->new(\$str); } else { # ENOENT is common with Maildir @@ -145,27 +131,30 @@ sub eml_from_path ($) { } } +sub _each_maildir_fn { + my ($fn, $im, $self) = @_; + if ($fn =~ /:2,([A-Za-z]*)\z/) { + my $fl = $1; + return if $fl =~ /[DT]/; # no Drafts or Trash for public + } + my $eml = eml_from_path($fn) or return; + if ($self && (my $filter = $self->filter($im))) { + my $ret = $filter->scrub($eml) or return; + return if $ret == REJECT(); + $eml = $ret; + } + $im->add($eml); +} + sub import_maildir { my ($self, $dir) = @_; - my $im = $self->importer(1); - foreach my $sub (qw(cur new tmp)) { -d "$dir/$sub" or die "$dir is not a Maildir (missing $sub)\n"; } - foreach my $sub (qw(cur new)) { - opendir my $dh, "$dir/$sub" or die "opendir $dir/$sub: $!\n"; - while (defined(my $fn = readdir($dh))) { - next unless is_maildir_basename($fn); - my $mime = eml_from_path("$dir/$fn") or next; - - if (my $filter = $self->filter($im)) { - my $ret = $filter->scrub($mime) or return; - return if $ret == REJECT(); - $mime = $ret; - } - $im->add($mime); - } - } + my $im = $self->importer(1); + my @self = $self->filter($im) ? ($self) : (); + PublicInbox::MdirReader::maildir_each_file(\&_each_maildir_fn, + $im, @self); $im->done; } diff --git a/lib/PublicInbox/MdirReader.pm b/lib/PublicInbox/MdirReader.pm index c6a0e7a8..e0ff676d 100644 --- a/lib/PublicInbox/MdirReader.pm +++ b/lib/PublicInbox/MdirReader.pm @@ -2,18 +2,36 @@ # License: AGPL-3.0+ # Maildirs for now, MH eventually +# ref: https://cr.yp.to/proto/maildir.html +# https://wiki2.dovecot.org/MailboxFormat/Maildir package PublicInbox::MdirReader; use strict; use v5.10.1; +# returns Maildir flags from a basename ('' for no flags, undef for invalid) +sub maildir_basename_flags { + my (@f) = split(/:/, $_[0], -1); + return if (scalar(@f) > 2 || substr($f[0], 0, 1) eq '.'); + $f[1] // return ''; # "new" + $f[1] =~ /\A2,([A-Za-z]*)\z/ ? $1 : undef; # "cur" +} + +# same as above, but for full path name +sub maildir_path_flags { + my ($f) = @_; + my $i = rindex($f, '/'); + $i >= 0 ? maildir_basename_flags(substr($f, $i + 1)) : undef; +} + sub maildir_each_file ($$;@) { my ($dir, $cb, @arg) = @_; $dir .= '/' unless substr($dir, -1) eq '/'; for my $d (qw(new/ cur/)) { my $pfx = $dir.$d; opendir my $dh, $pfx or next; - while (defined(my $fn = readdir($dh))) { - $cb->($pfx.$fn, @arg) if $fn =~ /:2,[A-Za-z]*\z/; + while (defined(my $bn = readdir($dh))) { + maildir_basename_flags($bn) // next; + $cb->($pfx.$bn, @arg); } } } diff --git a/lib/PublicInbox/Watch.pm b/lib/PublicInbox/Watch.pm index 1835fa0e..a4302162 100644 --- a/lib/PublicInbox/Watch.pm +++ b/lib/PublicInbox/Watch.pm @@ -2,12 +2,13 @@ # License: AGPL-3.0+ # # ref: https://cr.yp.to/proto/maildir.html -# http://wiki2.dovecot.org/MailboxFormat/Maildir +# httsp://wiki2.dovecot.org/MailboxFormat/Maildir package PublicInbox::Watch; use strict; use v5.10.1; use PublicInbox::Eml; use PublicInbox::InboxWritable qw(eml_from_path); +use PublicInbox::MdirReader; use PublicInbox::Filter::Base qw(REJECT); use PublicInbox::Spamcheck; use PublicInbox::Sigfd; @@ -207,7 +208,8 @@ sub import_eml ($$$) { sub _try_path { my ($self, $path) = @_; - return unless PublicInbox::InboxWritable::is_maildir_path($path); + my $fl = PublicInbox::MdirReader::maildir_path_flags($path) // return; + return if $fl =~ /[DT]/; # no Drafts or Trash if ($path !~ $self->{mdre}) { warn "unrecognized path: $path\n"; return; diff --git a/t/mdir_reader.t b/t/mdir_reader.t new file mode 100644 index 00000000..51b38af4 --- /dev/null +++ b/t/mdir_reader.t @@ -0,0 +1,22 @@ +#!perl -w +# Copyright (C) 2021 all contributors +# License: AGPL-3.0+ +use PublicInbox::TestCommon; +require_ok 'PublicInbox::MdirReader'; +*maildir_basename_flags = \&PublicInbox::MdirReader::maildir_basename_flags; +*maildir_path_flags = \&PublicInbox::MdirReader::maildir_path_flags; + +is(maildir_basename_flags('foo'), '', 'new valid name accepted'); +is(maildir_basename_flags('foo:2,'), '', 'cur valid name accepted'); +is(maildir_basename_flags('foo:2,bar'), 'bar', 'flags name accepted'); +is(maildir_basename_flags('.foo:2,bar'), undef, 'no hidden files'); +is(maildir_basename_flags('fo:o:2,bar'), undef, 'no extra colon'); +is(maildir_path_flags('/path/to/foo:2,S'), 'S', 'flag returned for path'); +is(maildir_path_flags('/path/to/.foo:2,S'), undef, 'no hidden paths'); +is(maildir_path_flags('/path/to/foo:2,'), '', 'no flags in path'); + +# not sure if there's a better place for eml_from_path +use_ok 'PublicInbox::InboxWritable', qw(eml_from_path); +is(eml_from_path('.'), undef, 'eml_from_path fails on directory'); + +done_testing;