public-inbox.git  about / heads / tags
an "archives first" approach to mailing lists
blob 16e505a25fc6c04708edf1464ebbb0ac48e2117d 2714 bytes (raw)
$ git show HEAD:lib/PublicInbox/MHreader.pm	# shows this blob on the CLI

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
 
# Copyright (C) all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>

# MH reader, based on Lib/mailbox.py in cpython source
package PublicInbox::MHreader;
use v5.12;
use PublicInbox::InboxWritable qw(eml_from_path);
use PublicInbox::OnDestroy;
use PublicInbox::IO qw(try_cat);
use PublicInbox::MdirSort;
use Carp qw(carp);
use autodie qw(chdir closedir opendir);

my %FL2OFF = ( # mh_sequences key => our keyword
	replied => 0,
	flagged => 1,
	unseen => 2, # negate
);
my @OFF2KW = qw(answered flagged); # [2] => unseen (negated)

sub new {
	my ($cls, $dir, $cwdfh) = @_;
	if (substr($dir, -1) ne '/') { # TODO: do this earlier
		carp "W: appending `/' to `$dir' (fix caller)\n";
		$dir .= '/';
	}
	bless { dir => $dir, cwdfh => $cwdfh }, $cls;
}

sub read_mh_sequences ($) { # caller must chdir($self->{dir})
	my ($self) = @_;
	my ($fl, $off, @n);
	my @seq = ('', '', '');
	for (split /\n+/s, try_cat('.mh_sequences')) {
		($fl, @n) = split /[: \t]+/;
		$off = $FL2OFF{$fl} // do { warn <<EOM;
W: unknown `$fl' in $self->{dir}.mh_sequences (ignoring)
EOM
			next;
		};
		@n = grep /\A[0-9]+\z/s, @n; # don't stat, yet
		if (@n) {
			@n = sort { $b <=> $a } @n; # to avoid resize
			my $buf = '';
			vec($buf, $_, 1) = 1 for @n;
			$seq[$off] = $buf;
		}
	}
	\@seq;
}

sub mh_each_file {
	my ($self, $efcb, @arg) = @_;
	opendir(my $dh, my $dir = $self->{dir});
	my $restore = on_destroy \&chdir, $self->{cwdfh};
	chdir($dh);
	my $sort = $self->{sort};
	if (defined $sort && "@$sort" ne 'none') {
		my @sort = map {
			my @tmp = $_ eq '' ? ('sequence') : split(/[, ]/);
			# sorting by name alphabetically makes no sense for MH:
			for my $k (@tmp) {
				s/\A(\-|\+|)(?:name|)\z/$1sequence/;
			}
			@tmp;
		} @$sort;
		my @n = grep /\A[0-9]+\z/s, readdir $dh;
		mdir_sort \@n, \@sort;
		$efcb->($dir, $_, $self, @arg) for @n;
	} else {
		while (readdir $dh) { # perl v5.12+ to set $_ on readdir
			$efcb->($dir, $_, $self, @arg) if /\A[0-9]+\z/s;
		}
	}
	closedir $dh; # may die
}

sub kw_for ($$) {
	my ($self, $n) = @_;
	my $seq = $self->{mh_seq} //= read_mh_sequences($self);
	my @kw = map { vec($seq->[$_], $n, 1) ? $OFF2KW[$_] : () } (0, 1);
	vec($seq->[2], $n, 1) or push @kw, 'seen';
	\@kw;
}

sub _file2eml { # mh_each_file / mh_read_one cb
	my ($dir, $n, $self, $ucb, @arg) = @_;
	my $eml = eml_from_path($n);
	$ucb->($dir, $n, kw_for($self, $n), $eml, @arg) if $eml;
}

sub mh_each_eml {
	my ($self, $ucb, @arg) = @_;
	mh_each_file($self, \&_file2eml, $ucb, @arg);
}

sub mh_read_one {
	my ($self, $n, $ucb, @arg) = @_;
	my $restore = on_destroy \&chdir, $self->{cwdfh};
	chdir(my $dir = $self->{dir});
	_file2eml($dir, $n, $self, $ucb, @arg);
}

1;

git clone https://public-inbox.org/public-inbox.git
git clone http://7fh6tueqddpjyxjmgtdiueylzoqt6pt7hec3pukyptlmohoowvhde4yd.onion/public-inbox.git