1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
| | # Copyright (C) all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# MH reader, based on Lib/mailbox.py in cpython source
package PublicInbox::MHreader;
use v5.12;
use PublicInbox::InboxWritable qw(eml_from_path);
use PublicInbox::OnDestroy;
use PublicInbox::IO qw(try_cat);
use PublicInbox::MdirSort;
use Carp qw(carp);
use autodie qw(chdir closedir opendir);
my %FL2OFF = ( # mh_sequences key => our keyword
replied => 0,
flagged => 1,
unseen => 2, # negate
);
my @OFF2KW = qw(answered flagged); # [2] => unseen (negated)
sub new {
my ($cls, $dir, $cwdfh) = @_;
if (substr($dir, -1) ne '/') { # TODO: do this earlier
carp "W: appending `/' to `$dir' (fix caller)\n";
$dir .= '/';
}
bless { dir => $dir, cwdfh => $cwdfh }, $cls;
}
sub read_mh_sequences ($) { # caller must chdir($self->{dir})
my ($self) = @_;
my ($fl, $off, @n);
my @seq = ('', '', '');
for (split /\n+/s, try_cat('.mh_sequences')) {
($fl, @n) = split /[: \t]+/;
$off = $FL2OFF{$fl} // do { warn <<EOM;
W: unknown `$fl' in $self->{dir}.mh_sequences (ignoring)
EOM
next;
};
@n = grep /\A[0-9]+\z/s, @n; # don't stat, yet
if (@n) {
@n = sort { $b <=> $a } @n; # to avoid resize
my $buf = '';
vec($buf, $_, 1) = 1 for @n;
$seq[$off] = $buf;
}
}
\@seq;
}
sub mh_each_file {
my ($self, $efcb, @arg) = @_;
opendir(my $dh, my $dir = $self->{dir});
my $restore = on_destroy \&chdir, $self->{cwdfh};
chdir($dh);
my $sort = $self->{sort};
if (defined $sort && "@$sort" ne 'none') {
my @sort = map {
my @tmp = $_ eq '' ? ('sequence') : split(/[, ]/);
# sorting by name alphabetically makes no sense for MH:
for my $k (@tmp) {
s/\A(\-|\+|)(?:name|)\z/$1sequence/;
}
@tmp;
} @$sort;
my @n = grep /\A[0-9]+\z/s, readdir $dh;
mdir_sort \@n, \@sort;
$efcb->($dir, $_, $self, @arg) for @n;
} else {
while (readdir $dh) { # perl v5.12+ to set $_ on readdir
$efcb->($dir, $_, $self, @arg) if /\A[0-9]+\z/s;
}
}
closedir $dh; # may die
}
sub kw_for ($$) {
my ($self, $n) = @_;
my $seq = $self->{mh_seq} //= read_mh_sequences($self);
my @kw = map { vec($seq->[$_], $n, 1) ? $OFF2KW[$_] : () } (0, 1);
vec($seq->[2], $n, 1) or push @kw, 'seen';
\@kw;
}
sub _file2eml { # mh_each_file / mh_read_one cb
my ($dir, $n, $self, $ucb, @arg) = @_;
my $eml = eml_from_path($n);
$ucb->($dir, $n, kw_for($self, $n), $eml, @arg) if $eml;
}
sub mh_each_eml {
my ($self, $ucb, @arg) = @_;
mh_each_file($self, \&_file2eml, $ucb, @arg);
}
sub mh_read_one {
my ($self, $n, $ucb, @arg) = @_;
my $restore = on_destroy \&chdir, $self->{cwdfh};
chdir(my $dir = $self->{dir});
_file2eml($dir, $n, $self, $ucb, @arg);
}
1;
|