From: Eric Wong <e@80x24.org> To: meta@public-inbox.org Subject: [PATCH 07/11] use MdirReader in -watch and InboxWritable Date: Tue, 9 Feb 2021 07:09:33 -0100 Message-ID: <20210209080937.4678-8-e@80x24.org> (raw) In-Reply-To: <20210209080937.4678-1-e@80x24.org> MdirReader now handles files in "$MAILDIR/new" properly and is stricter about what it accepts. eml_from_path is also made robust against FIFOs while eliminating TOCTOU races with between stat(2) and open(2) calls. --- MANIFEST | 1 + lib/PublicInbox/InboxWritable.pm | 55 +++++++++++++------------------- lib/PublicInbox/MdirReader.pm | 22 +++++++++++-- lib/PublicInbox/Watch.pm | 6 ++-- t/mdir_reader.t | 22 +++++++++++++ 5 files changed, 69 insertions(+), 37 deletions(-) create mode 100644 t/mdir_reader.t diff --git a/MANIFEST b/MANIFEST index 6b3fc812..f8ee6998 100644 --- a/MANIFEST +++ b/MANIFEST @@ -376,6 +376,7 @@ t/mbox_reader.t t/mda-mime.eml t/mda.t t/mda_filter_rubylang.t +t/mdir_reader.t t/mid.t t/mime.t t/miscsearch.t diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm index 3a4012cd..c3acc4f9 100644 --- a/lib/PublicInbox/InboxWritable.pm +++ b/lib/PublicInbox/InboxWritable.pm @@ -10,6 +10,7 @@ use PublicInbox::Import; use PublicInbox::Filter::Base qw(REJECT); use Errno qw(ENOENT); our @EXPORT_OK = qw(eml_from_path); +use Fcntl qw(O_RDONLY O_NONBLOCK); use constant { PERM_UMASK => 0, @@ -118,25 +119,10 @@ sub filter { undef; } -sub is_maildir_basename ($) { - my ($bn) = @_; - return 0 if $bn !~ /\A[a-zA-Z0-9][\-\w:,=\.]+\z/; - if ($bn =~ /:2,([A-Z]+)\z/i) { - my $flags = $1; - return 0 if $flags =~ /[DT]/; # no [D]rafts or [T]rashed mail - } - 1; -} - -sub is_maildir_path ($) { - my ($path) = @_; - my @p = split(m!/+!, $path); - (is_maildir_basename($p[-1]) && -f $path) ? 1 : 0; -} - sub eml_from_path ($) { my ($path) = @_; - if (open my $fh, '<', $path) { + if (sysopen(my $fh, $path, O_RDONLY|O_NONBLOCK)) { + return unless -f $fh; # no FIFOs or directories my $str = do { local $/; <$fh> } or return; PublicInbox::Eml->new(\$str); } else { # ENOENT is common with Maildir @@ -145,27 +131,30 @@ sub eml_from_path ($) { } } +sub _each_maildir_fn { + my ($fn, $im, $self) = @_; + if ($fn =~ /:2,([A-Za-z]*)\z/) { + my $fl = $1; + return if $fl =~ /[DT]/; # no Drafts or Trash for public + } + my $eml = eml_from_path($fn) or return; + if ($self && (my $filter = $self->filter($im))) { + my $ret = $filter->scrub($eml) or return; + return if $ret == REJECT(); + $eml = $ret; + } + $im->add($eml); +} + sub import_maildir { my ($self, $dir) = @_; - my $im = $self->importer(1); - foreach my $sub (qw(cur new tmp)) { -d "$dir/$sub" or die "$dir is not a Maildir (missing $sub)\n"; } - foreach my $sub (qw(cur new)) { - opendir my $dh, "$dir/$sub" or die "opendir $dir/$sub: $!\n"; - while (defined(my $fn = readdir($dh))) { - next unless is_maildir_basename($fn); - my $mime = eml_from_path("$dir/$fn") or next; - - if (my $filter = $self->filter($im)) { - my $ret = $filter->scrub($mime) or return; - return if $ret == REJECT(); - $mime = $ret; - } - $im->add($mime); - } - } + my $im = $self->importer(1); + my @self = $self->filter($im) ? ($self) : (); + PublicInbox::MdirReader::maildir_each_file(\&_each_maildir_fn, + $im, @self); $im->done; } diff --git a/lib/PublicInbox/MdirReader.pm b/lib/PublicInbox/MdirReader.pm index c6a0e7a8..e0ff676d 100644 --- a/lib/PublicInbox/MdirReader.pm +++ b/lib/PublicInbox/MdirReader.pm @@ -2,18 +2,36 @@ # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # Maildirs for now, MH eventually +# ref: https://cr.yp.to/proto/maildir.html +# https://wiki2.dovecot.org/MailboxFormat/Maildir package PublicInbox::MdirReader; use strict; use v5.10.1; +# returns Maildir flags from a basename ('' for no flags, undef for invalid) +sub maildir_basename_flags { + my (@f) = split(/:/, $_[0], -1); + return if (scalar(@f) > 2 || substr($f[0], 0, 1) eq '.'); + $f[1] // return ''; # "new" + $f[1] =~ /\A2,([A-Za-z]*)\z/ ? $1 : undef; # "cur" +} + +# same as above, but for full path name +sub maildir_path_flags { + my ($f) = @_; + my $i = rindex($f, '/'); + $i >= 0 ? maildir_basename_flags(substr($f, $i + 1)) : undef; +} + sub maildir_each_file ($$;@) { my ($dir, $cb, @arg) = @_; $dir .= '/' unless substr($dir, -1) eq '/'; for my $d (qw(new/ cur/)) { my $pfx = $dir.$d; opendir my $dh, $pfx or next; - while (defined(my $fn = readdir($dh))) { - $cb->($pfx.$fn, @arg) if $fn =~ /:2,[A-Za-z]*\z/; + while (defined(my $bn = readdir($dh))) { + maildir_basename_flags($bn) // next; + $cb->($pfx.$bn, @arg); } } } diff --git a/lib/PublicInbox/Watch.pm b/lib/PublicInbox/Watch.pm index 1835fa0e..a4302162 100644 --- a/lib/PublicInbox/Watch.pm +++ b/lib/PublicInbox/Watch.pm @@ -2,12 +2,13 @@ # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # # ref: https://cr.yp.to/proto/maildir.html -# http://wiki2.dovecot.org/MailboxFormat/Maildir +# httsp://wiki2.dovecot.org/MailboxFormat/Maildir package PublicInbox::Watch; use strict; use v5.10.1; use PublicInbox::Eml; use PublicInbox::InboxWritable qw(eml_from_path); +use PublicInbox::MdirReader; use PublicInbox::Filter::Base qw(REJECT); use PublicInbox::Spamcheck; use PublicInbox::Sigfd; @@ -207,7 +208,8 @@ sub import_eml ($$$) { sub _try_path { my ($self, $path) = @_; - return unless PublicInbox::InboxWritable::is_maildir_path($path); + my $fl = PublicInbox::MdirReader::maildir_path_flags($path) // return; + return if $fl =~ /[DT]/; # no Drafts or Trash if ($path !~ $self->{mdre}) { warn "unrecognized path: $path\n"; return; diff --git a/t/mdir_reader.t b/t/mdir_reader.t new file mode 100644 index 00000000..51b38af4 --- /dev/null +++ b/t/mdir_reader.t @@ -0,0 +1,22 @@ +#!perl -w +# Copyright (C) 2021 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +use PublicInbox::TestCommon; +require_ok 'PublicInbox::MdirReader'; +*maildir_basename_flags = \&PublicInbox::MdirReader::maildir_basename_flags; +*maildir_path_flags = \&PublicInbox::MdirReader::maildir_path_flags; + +is(maildir_basename_flags('foo'), '', 'new valid name accepted'); +is(maildir_basename_flags('foo:2,'), '', 'cur valid name accepted'); +is(maildir_basename_flags('foo:2,bar'), 'bar', 'flags name accepted'); +is(maildir_basename_flags('.foo:2,bar'), undef, 'no hidden files'); +is(maildir_basename_flags('fo:o:2,bar'), undef, 'no extra colon'); +is(maildir_path_flags('/path/to/foo:2,S'), 'S', 'flag returned for path'); +is(maildir_path_flags('/path/to/.foo:2,S'), undef, 'no hidden paths'); +is(maildir_path_flags('/path/to/foo:2,'), '', 'no flags in path'); + +# not sure if there's a better place for eml_from_path +use_ok 'PublicInbox::InboxWritable', qw(eml_from_path); +is(eml_from_path('.'), undef, 'eml_from_path fails on directory'); + +done_testing;
next prev parent reply other threads:[~2021-02-09 8:09 UTC|newest] Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top 2021-02-09 8:09 [PATCH 00/11] Maildir code consolidation, test updates Eric Wong 2021-02-09 8:09 ` [PATCH 01/11] t/thread-index-gap.t: avoid unnecessary map Eric Wong 2021-02-09 8:09 ` [PATCH 02/11] test_common: disable fsync on the CLI where possible Eric Wong 2021-02-09 8:09 ` [PATCH 03/11] t/cgi.t: modernizations and style updates Eric Wong 2021-02-09 8:09 ` [PATCH 04/11] git: ->qx: respect caller's $/ in array context Eric Wong 2021-02-09 8:09 ` [PATCH 05/11] lei: split out MdirReader package, lazy-require earlier Eric Wong 2021-02-09 8:09 ` [PATCH 06/11] t/run.perl: fix for >128 tests Eric Wong 2021-02-09 8:09 ` Eric Wong [this message] 2021-02-09 8:09 ` [PATCH 08/11] lei q: prefix --alert ops with ':' instead of '-' Eric Wong 2021-02-09 8:09 ` [PATCH 09/11] t/run.perl: drop Cwd dependency Eric Wong 2021-02-09 8:09 ` [PATCH 10/11] lei: replace "I:"-prefixed info messages with "#" Eric Wong 2021-02-09 8:09 ` [PATCH 11/11] tests|lei: fixes for TEST_RUN_MODE=0 and lei oneshot Eric Wong
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style List information: https://public-inbox.org/README * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20210209080937.4678-8-e@80x24.org \ --to=e@80x24.org \ --cc=meta@public-inbox.org \ --subject='Re: [PATCH 07/11] use MdirReader in -watch and InboxWritable' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
user/dev discussion of public-inbox itself This inbox may be cloned and mirrored by anyone: git clone --mirror https://public-inbox.org/meta git clone --mirror http://czquwvybam4bgbro.onion/meta git clone --mirror http://hjrcffqmbrq6wope.onion/meta git clone --mirror http://ou63pmih66umazou.onion/meta # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V1 meta meta/ https://public-inbox.org/meta \ meta@public-inbox.org public-inbox-index meta Example config snippet for mirrors. Newsgroups are available over NNTP: nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta nntp://ou63pmih66umazou.onion/inbox.comp.mail.public-inbox.meta nntp://czquwvybam4bgbro.onion/inbox.comp.mail.public-inbox.meta nntp://hjrcffqmbrq6wope.onion/inbox.comp.mail.public-inbox.meta nntp://news.gmane.io/gmane.mail.public-inbox.general note: .onion URLs require Tor: https://www.torproject.org/ code repositories for project(s) associated with this inbox: https://80x24.org/public-inbox.git AGPL code for this site: git clone https://public-inbox.org/public-inbox.git