From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 07/11] use MdirReader in -watch and InboxWritable
Date: Tue, 9 Feb 2021 07:09:33 -0100 [thread overview]
Message-ID: <20210209080937.4678-8-e@80x24.org> (raw)
In-Reply-To: <20210209080937.4678-1-e@80x24.org>
MdirReader now handles files in "$MAILDIR/new" properly and
is stricter about what it accepts. eml_from_path is also
made robust against FIFOs while eliminating TOCTOU races with
between stat(2) and open(2) calls.
---
MANIFEST | 1 +
lib/PublicInbox/InboxWritable.pm | 55 +++++++++++++-------------------
lib/PublicInbox/MdirReader.pm | 22 +++++++++++--
lib/PublicInbox/Watch.pm | 6 ++--
t/mdir_reader.t | 22 +++++++++++++
5 files changed, 69 insertions(+), 37 deletions(-)
create mode 100644 t/mdir_reader.t
diff --git a/MANIFEST b/MANIFEST
index 6b3fc812..f8ee6998 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -376,6 +376,7 @@ t/mbox_reader.t
t/mda-mime.eml
t/mda.t
t/mda_filter_rubylang.t
+t/mdir_reader.t
t/mid.t
t/mime.t
t/miscsearch.t
diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm
index 3a4012cd..c3acc4f9 100644
--- a/lib/PublicInbox/InboxWritable.pm
+++ b/lib/PublicInbox/InboxWritable.pm
@@ -10,6 +10,7 @@ use PublicInbox::Import;
use PublicInbox::Filter::Base qw(REJECT);
use Errno qw(ENOENT);
our @EXPORT_OK = qw(eml_from_path);
+use Fcntl qw(O_RDONLY O_NONBLOCK);
use constant {
PERM_UMASK => 0,
@@ -118,25 +119,10 @@ sub filter {
undef;
}
-sub is_maildir_basename ($) {
- my ($bn) = @_;
- return 0 if $bn !~ /\A[a-zA-Z0-9][\-\w:,=\.]+\z/;
- if ($bn =~ /:2,([A-Z]+)\z/i) {
- my $flags = $1;
- return 0 if $flags =~ /[DT]/; # no [D]rafts or [T]rashed mail
- }
- 1;
-}
-
-sub is_maildir_path ($) {
- my ($path) = @_;
- my @p = split(m!/+!, $path);
- (is_maildir_basename($p[-1]) && -f $path) ? 1 : 0;
-}
-
sub eml_from_path ($) {
my ($path) = @_;
- if (open my $fh, '<', $path) {
+ if (sysopen(my $fh, $path, O_RDONLY|O_NONBLOCK)) {
+ return unless -f $fh; # no FIFOs or directories
my $str = do { local $/; <$fh> } or return;
PublicInbox::Eml->new(\$str);
} else { # ENOENT is common with Maildir
@@ -145,27 +131,30 @@ sub eml_from_path ($) {
}
}
+sub _each_maildir_fn {
+ my ($fn, $im, $self) = @_;
+ if ($fn =~ /:2,([A-Za-z]*)\z/) {
+ my $fl = $1;
+ return if $fl =~ /[DT]/; # no Drafts or Trash for public
+ }
+ my $eml = eml_from_path($fn) or return;
+ if ($self && (my $filter = $self->filter($im))) {
+ my $ret = $filter->scrub($eml) or return;
+ return if $ret == REJECT();
+ $eml = $ret;
+ }
+ $im->add($eml);
+}
+
sub import_maildir {
my ($self, $dir) = @_;
- my $im = $self->importer(1);
-
foreach my $sub (qw(cur new tmp)) {
-d "$dir/$sub" or die "$dir is not a Maildir (missing $sub)\n";
}
- foreach my $sub (qw(cur new)) {
- opendir my $dh, "$dir/$sub" or die "opendir $dir/$sub: $!\n";
- while (defined(my $fn = readdir($dh))) {
- next unless is_maildir_basename($fn);
- my $mime = eml_from_path("$dir/$fn") or next;
-
- if (my $filter = $self->filter($im)) {
- my $ret = $filter->scrub($mime) or return;
- return if $ret == REJECT();
- $mime = $ret;
- }
- $im->add($mime);
- }
- }
+ my $im = $self->importer(1);
+ my @self = $self->filter($im) ? ($self) : ();
+ PublicInbox::MdirReader::maildir_each_file(\&_each_maildir_fn,
+ $im, @self);
$im->done;
}
diff --git a/lib/PublicInbox/MdirReader.pm b/lib/PublicInbox/MdirReader.pm
index c6a0e7a8..e0ff676d 100644
--- a/lib/PublicInbox/MdirReader.pm
+++ b/lib/PublicInbox/MdirReader.pm
@@ -2,18 +2,36 @@
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Maildirs for now, MH eventually
+# ref: https://cr.yp.to/proto/maildir.html
+# https://wiki2.dovecot.org/MailboxFormat/Maildir
package PublicInbox::MdirReader;
use strict;
use v5.10.1;
+# returns Maildir flags from a basename ('' for no flags, undef for invalid)
+sub maildir_basename_flags {
+ my (@f) = split(/:/, $_[0], -1);
+ return if (scalar(@f) > 2 || substr($f[0], 0, 1) eq '.');
+ $f[1] // return ''; # "new"
+ $f[1] =~ /\A2,([A-Za-z]*)\z/ ? $1 : undef; # "cur"
+}
+
+# same as above, but for full path name
+sub maildir_path_flags {
+ my ($f) = @_;
+ my $i = rindex($f, '/');
+ $i >= 0 ? maildir_basename_flags(substr($f, $i + 1)) : undef;
+}
+
sub maildir_each_file ($$;@) {
my ($dir, $cb, @arg) = @_;
$dir .= '/' unless substr($dir, -1) eq '/';
for my $d (qw(new/ cur/)) {
my $pfx = $dir.$d;
opendir my $dh, $pfx or next;
- while (defined(my $fn = readdir($dh))) {
- $cb->($pfx.$fn, @arg) if $fn =~ /:2,[A-Za-z]*\z/;
+ while (defined(my $bn = readdir($dh))) {
+ maildir_basename_flags($bn) // next;
+ $cb->($pfx.$bn, @arg);
}
}
}
diff --git a/lib/PublicInbox/Watch.pm b/lib/PublicInbox/Watch.pm
index 1835fa0e..a4302162 100644
--- a/lib/PublicInbox/Watch.pm
+++ b/lib/PublicInbox/Watch.pm
@@ -2,12 +2,13 @@
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# ref: https://cr.yp.to/proto/maildir.html
-# http://wiki2.dovecot.org/MailboxFormat/Maildir
+# httsp://wiki2.dovecot.org/MailboxFormat/Maildir
package PublicInbox::Watch;
use strict;
use v5.10.1;
use PublicInbox::Eml;
use PublicInbox::InboxWritable qw(eml_from_path);
+use PublicInbox::MdirReader;
use PublicInbox::Filter::Base qw(REJECT);
use PublicInbox::Spamcheck;
use PublicInbox::Sigfd;
@@ -207,7 +208,8 @@ sub import_eml ($$$) {
sub _try_path {
my ($self, $path) = @_;
- return unless PublicInbox::InboxWritable::is_maildir_path($path);
+ my $fl = PublicInbox::MdirReader::maildir_path_flags($path) // return;
+ return if $fl =~ /[DT]/; # no Drafts or Trash
if ($path !~ $self->{mdre}) {
warn "unrecognized path: $path\n";
return;
diff --git a/t/mdir_reader.t b/t/mdir_reader.t
new file mode 100644
index 00000000..51b38af4
--- /dev/null
+++ b/t/mdir_reader.t
@@ -0,0 +1,22 @@
+#!perl -w
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use PublicInbox::TestCommon;
+require_ok 'PublicInbox::MdirReader';
+*maildir_basename_flags = \&PublicInbox::MdirReader::maildir_basename_flags;
+*maildir_path_flags = \&PublicInbox::MdirReader::maildir_path_flags;
+
+is(maildir_basename_flags('foo'), '', 'new valid name accepted');
+is(maildir_basename_flags('foo:2,'), '', 'cur valid name accepted');
+is(maildir_basename_flags('foo:2,bar'), 'bar', 'flags name accepted');
+is(maildir_basename_flags('.foo:2,bar'), undef, 'no hidden files');
+is(maildir_basename_flags('fo:o:2,bar'), undef, 'no extra colon');
+is(maildir_path_flags('/path/to/foo:2,S'), 'S', 'flag returned for path');
+is(maildir_path_flags('/path/to/.foo:2,S'), undef, 'no hidden paths');
+is(maildir_path_flags('/path/to/foo:2,'), '', 'no flags in path');
+
+# not sure if there's a better place for eml_from_path
+use_ok 'PublicInbox::InboxWritable', qw(eml_from_path);
+is(eml_from_path('.'), undef, 'eml_from_path fails on directory');
+
+done_testing;
next prev parent reply other threads:[~2021-02-09 8:09 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-02-09 8:09 [PATCH 00/11] Maildir code consolidation, test updates Eric Wong
2021-02-09 8:09 ` [PATCH 01/11] t/thread-index-gap.t: avoid unnecessary map Eric Wong
2021-02-09 8:09 ` [PATCH 02/11] test_common: disable fsync on the CLI where possible Eric Wong
2021-02-09 8:09 ` [PATCH 03/11] t/cgi.t: modernizations and style updates Eric Wong
2021-02-09 8:09 ` [PATCH 04/11] git: ->qx: respect caller's $/ in array context Eric Wong
2021-02-09 8:09 ` [PATCH 05/11] lei: split out MdirReader package, lazy-require earlier Eric Wong
2021-02-09 8:09 ` [PATCH 06/11] t/run.perl: fix for >128 tests Eric Wong
2021-02-09 8:09 ` Eric Wong [this message]
2021-02-09 8:09 ` [PATCH 08/11] lei q: prefix --alert ops with ':' instead of '-' Eric Wong
2021-02-09 8:09 ` [PATCH 09/11] t/run.perl: drop Cwd dependency Eric Wong
2021-02-09 8:09 ` [PATCH 10/11] lei: replace "I:"-prefixed info messages with "#" Eric Wong
2021-02-09 8:09 ` [PATCH 11/11] tests|lei: fixes for TEST_RUN_MODE=0 and lei oneshot Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210209080937.4678-8-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).