about summary refs log tree commit homepage
path: root/lib/PublicInbox
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2021-02-09 07:09:33 -0100
committerEric Wong <e@80x24.org>2021-02-10 06:59:07 +0000
commitfa3f0cbcd1af5008e56c77e3c46ab60b5eca3a13 (patch)
tree318cbf3b843d0a33829ecf1fbb6c8becbe0b719e /lib/PublicInbox
parentcf3d02714d560cfeab1c5582ad2e5a11542cd649 (diff)
downloadpublic-inbox-fa3f0cbcd1af5008e56c77e3c46ab60b5eca3a13.tar.gz
MdirReader now handles files in "$MAILDIR/new" properly and
is stricter about what it accepts.  eml_from_path is also
made robust against FIFOs while eliminating TOCTOU races with
between stat(2) and open(2) calls.
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r--lib/PublicInbox/InboxWritable.pm55
-rw-r--r--lib/PublicInbox/MdirReader.pm22
-rw-r--r--lib/PublicInbox/Watch.pm6
3 files changed, 46 insertions, 37 deletions
diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm
index 3a4012cd..c3acc4f9 100644
--- a/lib/PublicInbox/InboxWritable.pm
+++ b/lib/PublicInbox/InboxWritable.pm
@@ -10,6 +10,7 @@ use PublicInbox::Import;
 use PublicInbox::Filter::Base qw(REJECT);
 use Errno qw(ENOENT);
 our @EXPORT_OK = qw(eml_from_path);
+use Fcntl qw(O_RDONLY O_NONBLOCK);
 
 use constant {
         PERM_UMASK => 0,
@@ -118,25 +119,10 @@ sub filter {
         undef;
 }
 
-sub is_maildir_basename ($) {
-        my ($bn) = @_;
-        return 0 if $bn !~ /\A[a-zA-Z0-9][\-\w:,=\.]+\z/;
-        if ($bn =~ /:2,([A-Z]+)\z/i) {
-                my $flags = $1;
-                return 0 if $flags =~ /[DT]/; # no [D]rafts or [T]rashed mail
-        }
-        1;
-}
-
-sub is_maildir_path ($) {
-        my ($path) = @_;
-        my @p = split(m!/+!, $path);
-        (is_maildir_basename($p[-1]) && -f $path) ? 1 : 0;
-}
-
 sub eml_from_path ($) {
         my ($path) = @_;
-        if (open my $fh, '<', $path) {
+        if (sysopen(my $fh, $path, O_RDONLY|O_NONBLOCK)) {
+                return unless -f $fh; # no FIFOs or directories
                 my $str = do { local $/; <$fh> } or return;
                 PublicInbox::Eml->new(\$str);
         } else { # ENOENT is common with Maildir
@@ -145,27 +131,30 @@ sub eml_from_path ($) {
         }
 }
 
+sub _each_maildir_fn {
+        my ($fn, $im, $self) = @_;
+        if ($fn =~ /:2,([A-Za-z]*)\z/) {
+                my $fl = $1;
+                return if $fl =~ /[DT]/; # no Drafts or Trash for public
+        }
+        my $eml = eml_from_path($fn) or return;
+        if ($self && (my $filter = $self->filter($im))) {
+                my $ret = $filter->scrub($eml) or return;
+                return if $ret == REJECT();
+                $eml = $ret;
+        }
+        $im->add($eml);
+}
+
 sub import_maildir {
         my ($self, $dir) = @_;
-        my $im = $self->importer(1);
-
         foreach my $sub (qw(cur new tmp)) {
                 -d "$dir/$sub" or die "$dir is not a Maildir (missing $sub)\n";
         }
-        foreach my $sub (qw(cur new)) {
-                opendir my $dh, "$dir/$sub" or die "opendir $dir/$sub: $!\n";
-                while (defined(my $fn = readdir($dh))) {
-                        next unless is_maildir_basename($fn);
-                        my $mime = eml_from_path("$dir/$fn") or next;
-
-                        if (my $filter = $self->filter($im)) {
-                                my $ret = $filter->scrub($mime) or return;
-                                return if $ret == REJECT();
-                                $mime = $ret;
-                        }
-                        $im->add($mime);
-                }
-        }
+        my $im = $self->importer(1);
+        my @self = $self->filter($im) ? ($self) : ();
+        PublicInbox::MdirReader::maildir_each_file(\&_each_maildir_fn,
+                                                $im, @self);
         $im->done;
 }
 
diff --git a/lib/PublicInbox/MdirReader.pm b/lib/PublicInbox/MdirReader.pm
index c6a0e7a8..e0ff676d 100644
--- a/lib/PublicInbox/MdirReader.pm
+++ b/lib/PublicInbox/MdirReader.pm
@@ -2,18 +2,36 @@
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # Maildirs for now, MH eventually
+# ref: https://cr.yp.to/proto/maildir.html
+#        https://wiki2.dovecot.org/MailboxFormat/Maildir
 package PublicInbox::MdirReader;
 use strict;
 use v5.10.1;
 
+# returns Maildir flags from a basename ('' for no flags, undef for invalid)
+sub maildir_basename_flags {
+        my (@f) = split(/:/, $_[0], -1);
+        return if (scalar(@f) > 2 || substr($f[0], 0, 1) eq '.');
+        $f[1] // return ''; # "new"
+        $f[1] =~ /\A2,([A-Za-z]*)\z/ ? $1 : undef; # "cur"
+}
+
+# same as above, but for full path name
+sub maildir_path_flags {
+        my ($f) = @_;
+        my $i = rindex($f, '/');
+        $i >= 0 ? maildir_basename_flags(substr($f, $i + 1)) : undef;
+}
+
 sub maildir_each_file ($$;@) {
         my ($dir, $cb, @arg) = @_;
         $dir .= '/' unless substr($dir, -1) eq '/';
         for my $d (qw(new/ cur/)) {
                 my $pfx = $dir.$d;
                 opendir my $dh, $pfx or next;
-                while (defined(my $fn = readdir($dh))) {
-                        $cb->($pfx.$fn, @arg) if $fn =~ /:2,[A-Za-z]*\z/;
+                while (defined(my $bn = readdir($dh))) {
+                        maildir_basename_flags($bn) // next;
+                        $cb->($pfx.$bn, @arg);
                 }
         }
 }
diff --git a/lib/PublicInbox/Watch.pm b/lib/PublicInbox/Watch.pm
index 1835fa0e..a4302162 100644
--- a/lib/PublicInbox/Watch.pm
+++ b/lib/PublicInbox/Watch.pm
@@ -2,12 +2,13 @@
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # ref: https://cr.yp.to/proto/maildir.html
-#        http://wiki2.dovecot.org/MailboxFormat/Maildir
+#        httsp://wiki2.dovecot.org/MailboxFormat/Maildir
 package PublicInbox::Watch;
 use strict;
 use v5.10.1;
 use PublicInbox::Eml;
 use PublicInbox::InboxWritable qw(eml_from_path);
+use PublicInbox::MdirReader;
 use PublicInbox::Filter::Base qw(REJECT);
 use PublicInbox::Spamcheck;
 use PublicInbox::Sigfd;
@@ -207,7 +208,8 @@ sub import_eml ($$$) {
 
 sub _try_path {
         my ($self, $path) = @_;
-        return unless PublicInbox::InboxWritable::is_maildir_path($path);
+        my $fl = PublicInbox::MdirReader::maildir_path_flags($path) // return;
+        return if $fl =~ /[DT]/; # no Drafts or Trash
         if ($path !~ $self->{mdre}) {
                 warn "unrecognized path: $path\n";
                 return;