From fa3f0cbcd1af5008e56c77e3c46ab60b5eca3a13 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 9 Feb 2021 07:09:33 -0100 Subject: use MdirReader in -watch and InboxWritable MdirReader now handles files in "$MAILDIR/new" properly and is stricter about what it accepts. eml_from_path is also made robust against FIFOs while eliminating TOCTOU races with between stat(2) and open(2) calls. --- lib/PublicInbox/InboxWritable.pm | 55 ++++++++++++++++------------------------ lib/PublicInbox/MdirReader.pm | 22 ++++++++++++++-- lib/PublicInbox/Watch.pm | 6 +++-- 3 files changed, 46 insertions(+), 37 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm index 3a4012cd..c3acc4f9 100644 --- a/lib/PublicInbox/InboxWritable.pm +++ b/lib/PublicInbox/InboxWritable.pm @@ -10,6 +10,7 @@ use PublicInbox::Import; use PublicInbox::Filter::Base qw(REJECT); use Errno qw(ENOENT); our @EXPORT_OK = qw(eml_from_path); +use Fcntl qw(O_RDONLY O_NONBLOCK); use constant { PERM_UMASK => 0, @@ -118,25 +119,10 @@ sub filter { undef; } -sub is_maildir_basename ($) { - my ($bn) = @_; - return 0 if $bn !~ /\A[a-zA-Z0-9][\-\w:,=\.]+\z/; - if ($bn =~ /:2,([A-Z]+)\z/i) { - my $flags = $1; - return 0 if $flags =~ /[DT]/; # no [D]rafts or [T]rashed mail - } - 1; -} - -sub is_maildir_path ($) { - my ($path) = @_; - my @p = split(m!/+!, $path); - (is_maildir_basename($p[-1]) && -f $path) ? 1 : 0; -} - sub eml_from_path ($) { my ($path) = @_; - if (open my $fh, '<', $path) { + if (sysopen(my $fh, $path, O_RDONLY|O_NONBLOCK)) { + return unless -f $fh; # no FIFOs or directories my $str = do { local $/; <$fh> } or return; PublicInbox::Eml->new(\$str); } else { # ENOENT is common with Maildir @@ -145,27 +131,30 @@ sub eml_from_path ($) { } } +sub _each_maildir_fn { + my ($fn, $im, $self) = @_; + if ($fn =~ /:2,([A-Za-z]*)\z/) { + my $fl = $1; + return if $fl =~ /[DT]/; # no Drafts or Trash for public + } + my $eml = eml_from_path($fn) or return; + if ($self && (my $filter = $self->filter($im))) { + my $ret = $filter->scrub($eml) or return; + return if $ret == REJECT(); + $eml = $ret; + } + $im->add($eml); +} + sub import_maildir { my ($self, $dir) = @_; - my $im = $self->importer(1); - foreach my $sub (qw(cur new tmp)) { -d "$dir/$sub" or die "$dir is not a Maildir (missing $sub)\n"; } - foreach my $sub (qw(cur new)) { - opendir my $dh, "$dir/$sub" or die "opendir $dir/$sub: $!\n"; - while (defined(my $fn = readdir($dh))) { - next unless is_maildir_basename($fn); - my $mime = eml_from_path("$dir/$fn") or next; - - if (my $filter = $self->filter($im)) { - my $ret = $filter->scrub($mime) or return; - return if $ret == REJECT(); - $mime = $ret; - } - $im->add($mime); - } - } + my $im = $self->importer(1); + my @self = $self->filter($im) ? ($self) : (); + PublicInbox::MdirReader::maildir_each_file(\&_each_maildir_fn, + $im, @self); $im->done; } diff --git a/lib/PublicInbox/MdirReader.pm b/lib/PublicInbox/MdirReader.pm index c6a0e7a8..e0ff676d 100644 --- a/lib/PublicInbox/MdirReader.pm +++ b/lib/PublicInbox/MdirReader.pm @@ -2,18 +2,36 @@ # License: AGPL-3.0+ # Maildirs for now, MH eventually +# ref: https://cr.yp.to/proto/maildir.html +# https://wiki2.dovecot.org/MailboxFormat/Maildir package PublicInbox::MdirReader; use strict; use v5.10.1; +# returns Maildir flags from a basename ('' for no flags, undef for invalid) +sub maildir_basename_flags { + my (@f) = split(/:/, $_[0], -1); + return if (scalar(@f) > 2 || substr($f[0], 0, 1) eq '.'); + $f[1] // return ''; # "new" + $f[1] =~ /\A2,([A-Za-z]*)\z/ ? $1 : undef; # "cur" +} + +# same as above, but for full path name +sub maildir_path_flags { + my ($f) = @_; + my $i = rindex($f, '/'); + $i >= 0 ? maildir_basename_flags(substr($f, $i + 1)) : undef; +} + sub maildir_each_file ($$;@) { my ($dir, $cb, @arg) = @_; $dir .= '/' unless substr($dir, -1) eq '/'; for my $d (qw(new/ cur/)) { my $pfx = $dir.$d; opendir my $dh, $pfx or next; - while (defined(my $fn = readdir($dh))) { - $cb->($pfx.$fn, @arg) if $fn =~ /:2,[A-Za-z]*\z/; + while (defined(my $bn = readdir($dh))) { + maildir_basename_flags($bn) // next; + $cb->($pfx.$bn, @arg); } } } diff --git a/lib/PublicInbox/Watch.pm b/lib/PublicInbox/Watch.pm index 1835fa0e..a4302162 100644 --- a/lib/PublicInbox/Watch.pm +++ b/lib/PublicInbox/Watch.pm @@ -2,12 +2,13 @@ # License: AGPL-3.0+ # # ref: https://cr.yp.to/proto/maildir.html -# http://wiki2.dovecot.org/MailboxFormat/Maildir +# httsp://wiki2.dovecot.org/MailboxFormat/Maildir package PublicInbox::Watch; use strict; use v5.10.1; use PublicInbox::Eml; use PublicInbox::InboxWritable qw(eml_from_path); +use PublicInbox::MdirReader; use PublicInbox::Filter::Base qw(REJECT); use PublicInbox::Spamcheck; use PublicInbox::Sigfd; @@ -207,7 +208,8 @@ sub import_eml ($$$) { sub _try_path { my ($self, $path) = @_; - return unless PublicInbox::InboxWritable::is_maildir_path($path); + my $fl = PublicInbox::MdirReader::maildir_path_flags($path) // return; + return if $fl =~ /[DT]/; # no Drafts or Trash if ($path !~ $self->{mdre}) { warn "unrecognized path: $path\n"; return; -- cgit v1.2.3-24-ge0c7