public-inbox.git  about / heads / tags
an "archives first" approach to mailing lists
blob 2981b058c1cce43121d5a223b3f148e399618ac9 3046 bytes (raw)
$ git show HEAD:lib/PublicInbox/MdirReader.pm	# shows this blob on the CLI

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
 
# Copyright (C) all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>

# Maildirs only (PublicInbox::MHreader exists, now)
# ref: https://cr.yp.to/proto/maildir.html
#	https://wiki2.dovecot.org/MailboxFormat/Maildir
package PublicInbox::MdirReader;
use strict;
use v5.10.1;
use PublicInbox::InboxWritable qw(eml_from_path);
use PublicInbox::SHA qw(sha256_hex);

# returns Maildir flags from a basename ('' for no flags, undef for invalid)
sub maildir_basename_flags {
	my (@f) = split(/:/, $_[0], -1);
	return if (scalar(@f) > 2 || substr($f[0], 0, 1) eq '.');
	$f[1] // return ''; # "new"
	$f[1] =~ /\A2,([A-Za-z]*)\z/ ? $1 : undef; # "cur"
}

# same as above, but for full path name
sub maildir_path_flags {
	my ($f) = @_;
	my $i = rindex($f, '/');
	$i >= 0 ? maildir_basename_flags(substr($f, $i + 1)) : undef;
}

sub shard_ok ($$$) {
	my ($bn, $mod, $shard) = @_;
	# can't get dirent.d_ino w/ pure Perl readdir, so we extract
	# the OID if it looks like one instead of doing stat(2)
	my $hex = $bn =~ m!\A([a-f0-9]{40,})! ? $1 : sha256_hex($bn);
	my $recno = hex(substr($hex, 0, 8));
	($recno % $mod) == $shard;
}

sub maildir_each_file {
	my ($self, $dir, $cb, @arg) = @_;
	$dir .= '/' unless substr($dir, -1) eq '/';
	my ($mod, $shard) = @{$self->{shard_info} // []};
	for my $d (qw(new/ cur/)) {
		my $pfx = $dir.$d;
		opendir my $dh, $pfx or next;
		while (defined(my $bn = readdir($dh))) {
			my $fl = maildir_basename_flags($bn) // next;
			next if defined($mod) && !shard_ok($bn, $mod, $shard);
			next if index($fl, 'T') >= 0; # no Trashed messages
			$cb->($pfx.$bn, $fl, @arg);
		}
	}
}

my %c2kw = ('D' => 'draft', F => 'flagged', P => 'forwarded',
	R => 'answered', S => 'seen');

sub maildir_each_eml {
	my ($self, $dir, $cb, @arg) = @_;
	$dir .= '/' unless substr($dir, -1) eq '/';
	my ($mod, $shard) = @{$self->{shard_info} // []};
	my $pfx = $dir . 'new/';
	if (opendir(my $dh, $pfx)) {
		while (defined(my $bn = readdir($dh))) {
			next if substr($bn, 0, 1) eq '.';
			my @f = split(/:/, $bn, -1);

			# mbsync and offlineimap both use "2," in "new/"
			next if ($f[1] // '2,') ne '2,' || defined($f[2]);

			next if defined($mod) && !shard_ok($bn, $mod, $shard);
			my $f = $pfx.$bn;
			my $eml = eml_from_path($f) or next;
			$cb->($f, [], $eml, @arg);
		}
	}
	$pfx = $dir . 'cur/';
	opendir my $dh, $pfx or return;
	while (defined(my $bn = readdir($dh))) {
		my $fl = maildir_basename_flags($bn) // next;
		next if index($fl, 'T') >= 0;
		next if defined($mod) && !shard_ok($bn, $mod, $shard);
		my $f = $pfx.$bn;
		my $eml = eml_from_path($f) or next;
		my @kw = sort(map { $c2kw{$_} // () } split(//, $fl));
		$cb->($f, \@kw, $eml, @arg);
	}
}

sub new { bless {}, __PACKAGE__ }

sub flags2kw ($) {
	if (wantarray) {
		my @unknown;
		my %kw;
		for (split(//, $_[0])) {
			my $k = $c2kw{$_};
			if (defined($k)) {
				$kw{$k} = 1;
			} else {
				push @unknown, $_;
			}
		}
		(\%kw, \@unknown);
	} else {
		[ sort(map { $c2kw{$_} // () } split(//, $_[0])) ];
	}
}

1;

git clone https://public-inbox.org/public-inbox.git
git clone http://7fh6tueqddpjyxjmgtdiueylzoqt6pt7hec3pukyptlmohoowvhde4yd.onion/public-inbox.git