about summary refs log tree commit homepage
path: root/lib/PublicInbox/MdirReader.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/PublicInbox/MdirReader.pm')
-rw-r--r--lib/PublicInbox/MdirReader.pm108
1 files changed, 108 insertions, 0 deletions
diff --git a/lib/PublicInbox/MdirReader.pm b/lib/PublicInbox/MdirReader.pm
new file mode 100644
index 00000000..2981b058
--- /dev/null
+++ b/lib/PublicInbox/MdirReader.pm
@@ -0,0 +1,108 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Maildirs only (PublicInbox::MHreader exists, now)
+# ref: https://cr.yp.to/proto/maildir.html
+#        https://wiki2.dovecot.org/MailboxFormat/Maildir
+package PublicInbox::MdirReader;
+use strict;
+use v5.10.1;
+use PublicInbox::InboxWritable qw(eml_from_path);
+use PublicInbox::SHA qw(sha256_hex);
+
+# returns Maildir flags from a basename ('' for no flags, undef for invalid)
+sub maildir_basename_flags {
+        my (@f) = split(/:/, $_[0], -1);
+        return if (scalar(@f) > 2 || substr($f[0], 0, 1) eq '.');
+        $f[1] // return ''; # "new"
+        $f[1] =~ /\A2,([A-Za-z]*)\z/ ? $1 : undef; # "cur"
+}
+
+# same as above, but for full path name
+sub maildir_path_flags {
+        my ($f) = @_;
+        my $i = rindex($f, '/');
+        $i >= 0 ? maildir_basename_flags(substr($f, $i + 1)) : undef;
+}
+
+sub shard_ok ($$$) {
+        my ($bn, $mod, $shard) = @_;
+        # can't get dirent.d_ino w/ pure Perl readdir, so we extract
+        # the OID if it looks like one instead of doing stat(2)
+        my $hex = $bn =~ m!\A([a-f0-9]{40,})! ? $1 : sha256_hex($bn);
+        my $recno = hex(substr($hex, 0, 8));
+        ($recno % $mod) == $shard;
+}
+
+sub maildir_each_file {
+        my ($self, $dir, $cb, @arg) = @_;
+        $dir .= '/' unless substr($dir, -1) eq '/';
+        my ($mod, $shard) = @{$self->{shard_info} // []};
+        for my $d (qw(new/ cur/)) {
+                my $pfx = $dir.$d;
+                opendir my $dh, $pfx or next;
+                while (defined(my $bn = readdir($dh))) {
+                        my $fl = maildir_basename_flags($bn) // next;
+                        next if defined($mod) && !shard_ok($bn, $mod, $shard);
+                        next if index($fl, 'T') >= 0; # no Trashed messages
+                        $cb->($pfx.$bn, $fl, @arg);
+                }
+        }
+}
+
+my %c2kw = ('D' => 'draft', F => 'flagged', P => 'forwarded',
+        R => 'answered', S => 'seen');
+
+sub maildir_each_eml {
+        my ($self, $dir, $cb, @arg) = @_;
+        $dir .= '/' unless substr($dir, -1) eq '/';
+        my ($mod, $shard) = @{$self->{shard_info} // []};
+        my $pfx = $dir . 'new/';
+        if (opendir(my $dh, $pfx)) {
+                while (defined(my $bn = readdir($dh))) {
+                        next if substr($bn, 0, 1) eq '.';
+                        my @f = split(/:/, $bn, -1);
+
+                        # mbsync and offlineimap both use "2," in "new/"
+                        next if ($f[1] // '2,') ne '2,' || defined($f[2]);
+
+                        next if defined($mod) && !shard_ok($bn, $mod, $shard);
+                        my $f = $pfx.$bn;
+                        my $eml = eml_from_path($f) or next;
+                        $cb->($f, [], $eml, @arg);
+                }
+        }
+        $pfx = $dir . 'cur/';
+        opendir my $dh, $pfx or return;
+        while (defined(my $bn = readdir($dh))) {
+                my $fl = maildir_basename_flags($bn) // next;
+                next if index($fl, 'T') >= 0;
+                next if defined($mod) && !shard_ok($bn, $mod, $shard);
+                my $f = $pfx.$bn;
+                my $eml = eml_from_path($f) or next;
+                my @kw = sort(map { $c2kw{$_} // () } split(//, $fl));
+                $cb->($f, \@kw, $eml, @arg);
+        }
+}
+
+sub new { bless {}, __PACKAGE__ }
+
+sub flags2kw ($) {
+        if (wantarray) {
+                my @unknown;
+                my %kw;
+                for (split(//, $_[0])) {
+                        my $k = $c2kw{$_};
+                        if (defined($k)) {
+                                $kw{$k} = 1;
+                        } else {
+                                push @unknown, $_;
+                        }
+                }
+                (\%kw, \@unknown);
+        } else {
+                [ sort(map { $c2kw{$_} // () } split(//, $_[0])) ];
+        }
+}
+
+1;