user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 07/11] use MdirReader in -watch and InboxWritable
Date: Tue,  9 Feb 2021 07:09:33 -0100	[thread overview]
Message-ID: <20210209080937.4678-8-e@80x24.org> (raw)
In-Reply-To: <20210209080937.4678-1-e@80x24.org>

MdirReader now handles files in "$MAILDIR/new" properly and
is stricter about what it accepts.  eml_from_path is also
made robust against FIFOs while eliminating TOCTOU races with
between stat(2) and open(2) calls.
---
 MANIFEST                         |  1 +
 lib/PublicInbox/InboxWritable.pm | 55 +++++++++++++-------------------
 lib/PublicInbox/MdirReader.pm    | 22 +++++++++++--
 lib/PublicInbox/Watch.pm         |  6 ++--
 t/mdir_reader.t                  | 22 +++++++++++++
 5 files changed, 69 insertions(+), 37 deletions(-)
 create mode 100644 t/mdir_reader.t

diff --git a/MANIFEST b/MANIFEST
index 6b3fc812..f8ee6998 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -376,6 +376,7 @@ t/mbox_reader.t
 t/mda-mime.eml
 t/mda.t
 t/mda_filter_rubylang.t
+t/mdir_reader.t
 t/mid.t
 t/mime.t
 t/miscsearch.t
diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm
index 3a4012cd..c3acc4f9 100644
--- a/lib/PublicInbox/InboxWritable.pm
+++ b/lib/PublicInbox/InboxWritable.pm
@@ -10,6 +10,7 @@ use PublicInbox::Import;
 use PublicInbox::Filter::Base qw(REJECT);
 use Errno qw(ENOENT);
 our @EXPORT_OK = qw(eml_from_path);
+use Fcntl qw(O_RDONLY O_NONBLOCK);
 
 use constant {
 	PERM_UMASK => 0,
@@ -118,25 +119,10 @@ sub filter {
 	undef;
 }
 
-sub is_maildir_basename ($) {
-	my ($bn) = @_;
-	return 0 if $bn !~ /\A[a-zA-Z0-9][\-\w:,=\.]+\z/;
-	if ($bn =~ /:2,([A-Z]+)\z/i) {
-		my $flags = $1;
-		return 0 if $flags =~ /[DT]/; # no [D]rafts or [T]rashed mail
-	}
-	1;
-}
-
-sub is_maildir_path ($) {
-	my ($path) = @_;
-	my @p = split(m!/+!, $path);
-	(is_maildir_basename($p[-1]) && -f $path) ? 1 : 0;
-}
-
 sub eml_from_path ($) {
 	my ($path) = @_;
-	if (open my $fh, '<', $path) {
+	if (sysopen(my $fh, $path, O_RDONLY|O_NONBLOCK)) {
+		return unless -f $fh; # no FIFOs or directories
 		my $str = do { local $/; <$fh> } or return;
 		PublicInbox::Eml->new(\$str);
 	} else { # ENOENT is common with Maildir
@@ -145,27 +131,30 @@ sub eml_from_path ($) {
 	}
 }
 
+sub _each_maildir_fn {
+	my ($fn, $im, $self) = @_;
+	if ($fn =~ /:2,([A-Za-z]*)\z/) {
+		my $fl = $1;
+		return if $fl =~ /[DT]/; # no Drafts or Trash for public
+	}
+	my $eml = eml_from_path($fn) or return;
+	if ($self && (my $filter = $self->filter($im))) {
+		my $ret = $filter->scrub($eml) or return;
+		return if $ret == REJECT();
+		$eml = $ret;
+	}
+	$im->add($eml);
+}
+
 sub import_maildir {
 	my ($self, $dir) = @_;
-	my $im = $self->importer(1);
-
 	foreach my $sub (qw(cur new tmp)) {
 		-d "$dir/$sub" or die "$dir is not a Maildir (missing $sub)\n";
 	}
-	foreach my $sub (qw(cur new)) {
-		opendir my $dh, "$dir/$sub" or die "opendir $dir/$sub: $!\n";
-		while (defined(my $fn = readdir($dh))) {
-			next unless is_maildir_basename($fn);
-			my $mime = eml_from_path("$dir/$fn") or next;
-
-			if (my $filter = $self->filter($im)) {
-				my $ret = $filter->scrub($mime) or return;
-				return if $ret == REJECT();
-				$mime = $ret;
-			}
-			$im->add($mime);
-		}
-	}
+	my $im = $self->importer(1);
+	my @self = $self->filter($im) ? ($self) : ();
+	PublicInbox::MdirReader::maildir_each_file(\&_each_maildir_fn,
+						$im, @self);
 	$im->done;
 }
 
diff --git a/lib/PublicInbox/MdirReader.pm b/lib/PublicInbox/MdirReader.pm
index c6a0e7a8..e0ff676d 100644
--- a/lib/PublicInbox/MdirReader.pm
+++ b/lib/PublicInbox/MdirReader.pm
@@ -2,18 +2,36 @@
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # Maildirs for now, MH eventually
+# ref: https://cr.yp.to/proto/maildir.html
+#	https://wiki2.dovecot.org/MailboxFormat/Maildir
 package PublicInbox::MdirReader;
 use strict;
 use v5.10.1;
 
+# returns Maildir flags from a basename ('' for no flags, undef for invalid)
+sub maildir_basename_flags {
+	my (@f) = split(/:/, $_[0], -1);
+	return if (scalar(@f) > 2 || substr($f[0], 0, 1) eq '.');
+	$f[1] // return ''; # "new"
+	$f[1] =~ /\A2,([A-Za-z]*)\z/ ? $1 : undef; # "cur"
+}
+
+# same as above, but for full path name
+sub maildir_path_flags {
+	my ($f) = @_;
+	my $i = rindex($f, '/');
+	$i >= 0 ? maildir_basename_flags(substr($f, $i + 1)) : undef;
+}
+
 sub maildir_each_file ($$;@) {
 	my ($dir, $cb, @arg) = @_;
 	$dir .= '/' unless substr($dir, -1) eq '/';
 	for my $d (qw(new/ cur/)) {
 		my $pfx = $dir.$d;
 		opendir my $dh, $pfx or next;
-		while (defined(my $fn = readdir($dh))) {
-			$cb->($pfx.$fn, @arg) if $fn =~ /:2,[A-Za-z]*\z/;
+		while (defined(my $bn = readdir($dh))) {
+			maildir_basename_flags($bn) // next;
+			$cb->($pfx.$bn, @arg);
 		}
 	}
 }
diff --git a/lib/PublicInbox/Watch.pm b/lib/PublicInbox/Watch.pm
index 1835fa0e..a4302162 100644
--- a/lib/PublicInbox/Watch.pm
+++ b/lib/PublicInbox/Watch.pm
@@ -2,12 +2,13 @@
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # ref: https://cr.yp.to/proto/maildir.html
-#	http://wiki2.dovecot.org/MailboxFormat/Maildir
+#	httsp://wiki2.dovecot.org/MailboxFormat/Maildir
 package PublicInbox::Watch;
 use strict;
 use v5.10.1;
 use PublicInbox::Eml;
 use PublicInbox::InboxWritable qw(eml_from_path);
+use PublicInbox::MdirReader;
 use PublicInbox::Filter::Base qw(REJECT);
 use PublicInbox::Spamcheck;
 use PublicInbox::Sigfd;
@@ -207,7 +208,8 @@ sub import_eml ($$$) {
 
 sub _try_path {
 	my ($self, $path) = @_;
-	return unless PublicInbox::InboxWritable::is_maildir_path($path);
+	my $fl = PublicInbox::MdirReader::maildir_path_flags($path) // return;
+	return if $fl =~ /[DT]/; # no Drafts or Trash
 	if ($path !~ $self->{mdre}) {
 		warn "unrecognized path: $path\n";
 		return;
diff --git a/t/mdir_reader.t b/t/mdir_reader.t
new file mode 100644
index 00000000..51b38af4
--- /dev/null
+++ b/t/mdir_reader.t
@@ -0,0 +1,22 @@
+#!perl -w
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use PublicInbox::TestCommon;
+require_ok 'PublicInbox::MdirReader';
+*maildir_basename_flags = \&PublicInbox::MdirReader::maildir_basename_flags;
+*maildir_path_flags = \&PublicInbox::MdirReader::maildir_path_flags;
+
+is(maildir_basename_flags('foo'), '', 'new valid name accepted');
+is(maildir_basename_flags('foo:2,'), '', 'cur valid name accepted');
+is(maildir_basename_flags('foo:2,bar'), 'bar', 'flags name accepted');
+is(maildir_basename_flags('.foo:2,bar'), undef, 'no hidden files');
+is(maildir_basename_flags('fo:o:2,bar'), undef, 'no extra colon');
+is(maildir_path_flags('/path/to/foo:2,S'), 'S', 'flag returned for path');
+is(maildir_path_flags('/path/to/.foo:2,S'), undef, 'no hidden paths');
+is(maildir_path_flags('/path/to/foo:2,'), '', 'no flags in path');
+
+# not sure if there's a better place for eml_from_path
+use_ok 'PublicInbox::InboxWritable', qw(eml_from_path);
+is(eml_from_path('.'), undef, 'eml_from_path fails on directory');
+
+done_testing;

  parent reply	other threads:[~2021-02-09  8:09 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-02-09  8:09 [PATCH 00/11] Maildir code consolidation, test updates Eric Wong
2021-02-09  8:09 ` [PATCH 01/11] t/thread-index-gap.t: avoid unnecessary map Eric Wong
2021-02-09  8:09 ` [PATCH 02/11] test_common: disable fsync on the CLI where possible Eric Wong
2021-02-09  8:09 ` [PATCH 03/11] t/cgi.t: modernizations and style updates Eric Wong
2021-02-09  8:09 ` [PATCH 04/11] git: ->qx: respect caller's $/ in array context Eric Wong
2021-02-09  8:09 ` [PATCH 05/11] lei: split out MdirReader package, lazy-require earlier Eric Wong
2021-02-09  8:09 ` [PATCH 06/11] t/run.perl: fix for >128 tests Eric Wong
2021-02-09  8:09 ` Eric Wong [this message]
2021-02-09  8:09 ` [PATCH 08/11] lei q: prefix --alert ops with ':' instead of '-' Eric Wong
2021-02-09  8:09 ` [PATCH 09/11] t/run.perl: drop Cwd dependency Eric Wong
2021-02-09  8:09 ` [PATCH 10/11] lei: replace "I:"-prefixed info messages with "#" Eric Wong
2021-02-09  8:09 ` [PATCH 11/11] tests|lei: fixes for TEST_RUN_MODE=0 and lei oneshot Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210209080937.4678-8-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).