user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
* [PATCH] lei: support reading MH for convert+import+index
@ 2023-12-16 13:09 Eric Wong
  2023-12-16 16:15 ` Konstantin Ryabitsev
  2023-12-29 18:05 ` [PATCH v2] " Eric Wong
  0 siblings, 2 replies; 5+ messages in thread
From: Eric Wong @ 2023-12-16 13:09 UTC (permalink / raw)
  To: meta

The MH format is widely-supported and used by various MUAs such
as mutt and sylpheed, and a MH-like format is used by mlmmj for
archives, as well.  Locking implementations for writes are
inconsistent, so this commit doesn't support writes, yet.

inotify|EVFILT_VNODE watches aren't supported, yet, either.
---
 MANIFEST                       |   3 +
 lib/PublicInbox/LEI.pm         |  13 ++--
 lib/PublicInbox/LeiConvert.pm  |   5 ++
 lib/PublicInbox/LeiImport.pm   |  23 +++++++
 lib/PublicInbox/LeiImportKw.pm |   2 +-
 lib/PublicInbox/LeiIndex.pm    |   2 +-
 lib/PublicInbox/LeiInput.pm    |  52 +++++++++++++---
 lib/PublicInbox/LeiMailSync.pm |  39 ++++++++----
 lib/PublicInbox/LeiToMail.pm   |   5 ++
 lib/PublicInbox/MHreader.pm    | 103 +++++++++++++++++++++++++++++++
 lib/PublicInbox/MdirReader.pm  |   2 +-
 lib/PublicInbox/MdirSort.pm    |  46 ++++++++++++++
 lib/PublicInbox/TestCommon.pm  |  22 ++++---
 t/mh_reader.t                  | 108 +++++++++++++++++++++++++++++++++
 14 files changed, 392 insertions(+), 33 deletions(-)
 create mode 100644 lib/PublicInbox/MHreader.pm
 create mode 100644 lib/PublicInbox/MdirSort.pm
 create mode 100644 t/mh_reader.t

diff --git a/MANIFEST b/MANIFEST
index e22674b7..8bcc3179 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -293,6 +293,7 @@ lib/PublicInbox/Linkify.pm
 lib/PublicInbox/Listener.pm
 lib/PublicInbox/Lock.pm
 lib/PublicInbox/MDA.pm
+lib/PublicInbox/MHreader.pm
 lib/PublicInbox/MID.pm
 lib/PublicInbox/MIME.pm
 lib/PublicInbox/MailDiff.pm
@@ -302,6 +303,7 @@ lib/PublicInbox/MboxGz.pm
 lib/PublicInbox/MboxLock.pm
 lib/PublicInbox/MboxReader.pm
 lib/PublicInbox/MdirReader.pm
+lib/PublicInbox/MdirSort.pm
 lib/PublicInbox/MiscIdx.pm
 lib/PublicInbox/MiscSearch.pm
 lib/PublicInbox/MsgIter.pm
@@ -543,6 +545,7 @@ t/mda-mime.eml
 t/mda.t
 t/mda_filter_rubylang.t
 t/mdir_reader.t
+t/mh_reader.t
 t/mid.t
 t/mime.t
 t/miscsearch.t
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 17431518..e0cfd55a 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -267,7 +267,7 @@ import => [ 'LOCATION...|--stdin [LABELS...]',
 	'one-time import/update from URL or filesystem',
 	qw(stdin| offset=i recursive|r exclude=s include|I=s new-only
 	lock=s@ in-format|F=s kw! verbose|v+ incremental! mail-sync!
-	commit-delay=i),
+	commit-delay=i sort|s:s@),
 	@net_opt, @c_opt ],
 'forget-mail-sync' => [ 'LOCATION...',
 	'forget sync information for a mail folder', @c_opt ],
@@ -280,7 +280,7 @@ import => [ 'LOCATION...|--stdin [LABELS...]',
 'convert' => [ 'LOCATION...|--stdin',
 	'one-time conversion from URL or filesystem to another format',
 	qw(stdin| in-format|F=s out-format|f=s output|mfolder|o=s lock=s@ kw!
-		rsyncable),
+		rsyncable sort|s:s@),
 	@net_opt, @c_opt ],
 'p2q' => [ 'LOCATION_OR_COMMIT...|--stdin',
 	"use a patch to generate a query for `lei q --stdin'",
@@ -321,6 +321,9 @@ import => [ 'LOCATION...|--stdin [LABELS...]',
 my $stdin_formats = [ 'MAIL_FORMAT|eml|mboxrd|mboxcl2|mboxcl|mboxo',
 			'specify message input format' ];
 my $ls_format = [ 'OUT|plain|json|null', 'listing output format' ];
+my $sort_out = [ 'VAL|received|relevance|docid',
+		"order of results is `--output'-dependent"];
+my $sort_in = [ 'sequence|mtime|size', 'sort input (format-dependent)' ];
 
 # we use \x{a0} (non-breaking SP) to avoid wrapping in PublicInbox::LeiHelp
 my %OPTDESC = (
@@ -428,8 +431,10 @@ my %OPTDESC = (
 'limit|n=i@' => ['NUM', 'limit on number of matches (default: 10000)' ],
 'offset=i' => ['OFF', 'search result offset (default: 0)'],
 
-'sort|s=s' => [ 'VAL|received|relevance|docid',
-		"order of results is `--output'-dependent"],
+'sort|s=s	q' => $sort_out,
+'sort|s=s	lcat' => $sort_out,
+'sort|s:s@	convert' => $sort_in,
+'sort|s:s@	import' => $sort_in,
 'reverse|r' => 'reverse search results', # like sort(1)
 
 'boost=i' => 'increase/decrease priority of results (default: 0)',
diff --git a/lib/PublicInbox/LeiConvert.pm b/lib/PublicInbox/LeiConvert.pm
index 8f628562..17a952f2 100644
--- a/lib/PublicInbox/LeiConvert.pm
+++ b/lib/PublicInbox/LeiConvert.pm
@@ -28,6 +28,11 @@ sub input_maildir_cb {
 	$self->{wcb}->(undef, { kw => $kw }, $eml);
 }
 
+sub input_mh_cb {
+	my ($dn, $bn, $kw, $eml, $self) = @_;
+	$self->{wcb}->(undef, { kw => $kw }, $eml);
+}
+
 sub process_inputs { # via wq_do
 	my ($self) = @_;
 	local $PublicInbox::DS::in_loop = 0; # force synchronous awaitpid
diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm
index c2552bf0..5521188c 100644
--- a/lib/PublicInbox/LeiImport.pm
+++ b/lib/PublicInbox/LeiImport.pm
@@ -53,6 +53,29 @@ sub pmdir_cb { # called via wq_io_do from LeiPmdir->each_mdir_fn
 	}
 }
 
+sub input_mh_cb {
+	my ($mhdir, $n, $kw, $eml, $self) = @_;
+	substr($mhdir, 0, 0) = 'mh:'; # add prefix
+	my $lse = $self->{lse} //= $self->{lei}->{sto}->search;
+	my $lms = $self->{-lms_rw} //= $self->{lei}->lms; # may be 0 or undef
+	my @oidbin = $lms ? $lms->num_oidbin($mhdir, $n) : ();
+	@oidbin > 1 and warn("W: $mhdir/$n not unique:\n",
+				map { "\t".unpack('H*', $_)."\n" } @oidbin);
+	my @docids = sort { $a <=> $b } uniqstr
+			map { $lse->over->oidbin_exists($_) } @oidbin;
+	if (scalar @docids) {
+		$lse->kw_changed(undef, $kw, \@docids) or return;
+	}
+	if (defined $eml) {
+		my $vmd = $self->{-import_kw} ? { kw => $kw } : undef;
+		$vmd->{sync_info} = [ $mhdir, $n + 0 ] if $self->{-mail_sync};
+		$self->input_eml_cb($eml, $vmd);
+	}
+	# TODO:
+	# elsif (my $ikw = $self->{lei}->{ikw}) { # old message, kw only
+	#	$ikw->wq_io_do('ck_update_kw', [], "mh:$dir", $uid, $kw);
+}
+
 sub input_net_cb { # imap_each / nntp_each
 	my ($uri, $uid, $kw, $eml, $self) = @_;
 	if (defined $eml) {
diff --git a/lib/PublicInbox/LeiImportKw.pm b/lib/PublicInbox/LeiImportKw.pm
index 4b8e69fb..765e23cd 100644
--- a/lib/PublicInbox/LeiImportKw.pm
+++ b/lib/PublicInbox/LeiImportKw.pm
@@ -36,7 +36,7 @@ sub ipc_atfork_child {
 sub ck_update_kw { # via wq_io_do
 	my ($self, $url, $uid, $kw) = @_;
 	my @oidbin = $self->{-lms_rw}->num_oidbin($url, $uid);
-	my $uid_url = "$url/;UID=$uid";
+	my $uid_url = index($url, 'mh:') == 0 ? $url.$uid : "$url/;UID=$uid";
 	@oidbin > 1 and warn("W: $uid_url not unique:\n",
 				map { "\t".unpack('H*', $_)."\n" } @oidbin);
 	my @docids = sort { $a <=> $b } uniqstr
diff --git a/lib/PublicInbox/LeiIndex.pm b/lib/PublicInbox/LeiIndex.pm
index b3f3e1a0..0e329e58 100644
--- a/lib/PublicInbox/LeiIndex.pm
+++ b/lib/PublicInbox/LeiIndex.pm
@@ -35,7 +35,7 @@ sub lei_index {
 
 no warnings 'once';
 no strict 'refs';
-for my $m (qw(pmdir_cb input_net_cb)) {
+for my $m (qw(pmdir_cb input_net_cb input_mh_cb)) {
 	*$m = PublicInbox::LeiImport->can($m);
 }
 
diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm
index daba9a8e..947a7a79 100644
--- a/lib/PublicInbox/LeiInput.pm
+++ b/lib/PublicInbox/LeiInput.pm
@@ -69,6 +69,11 @@ sub input_maildir_cb {
 	$self->input_eml_cb($eml);
 }
 
+sub input_mh_cb {
+	my ($dn, $n, $kw, $eml, $self) = @_;
+	$self->input_eml_cb($eml);
+}
+
 sub input_net_cb { # imap_each, nntp_each cb
 	my ($url, $uid, $kw, $eml, $self) = @_;
 	$self->input_eml_cb($eml);
@@ -190,7 +195,7 @@ sub input_path_url {
 		$ifmt = lc($1);
 	} elsif ($input =~ /\.(?:patch|eml)\z/i) {
 		$ifmt = 'eml';
-	} elsif (-f $input && $input =~ m{\A(?:.+)/(?:new|cur)/([^/]+)\z}) {
+	} elsif ($input =~ m{\A(?:.+)/(?:new|cur)/([^/]+)\z} && -f $input) {
 		my $bn = $1;
 		my $fl = PublicInbox::MdirReader::maildir_basename_flags($bn);
 		return if index($fl, 'T') >= 0;
@@ -204,6 +209,10 @@ sub input_path_url {
 	my $devfd = $lei->path_to_fd($input) // return;
 	if ($devfd >= 0) {
 		$self->input_fh($ifmt, $lei->{$devfd}, $input, @args);
+	} elsif ($devfd < 0 && $input =~ m{\A(.+/)([0-9]+)\z} && -f $input) {
+		my ($dn, $n) = ($1, $2);
+		my $mhr = PublicInbox::MHreader->new($dn, $lei->{3});
+		$mhr->mh_read_one($n, $self->can('input_mh_cb'), $self);
 	} elsif (-f $input && $ifmt eq 'eml') {
 		open my $fh, '<', $input or
 					return $lei->fail("open($input): $!");
@@ -231,6 +240,10 @@ sub input_path_url {
 						$self->can('input_maildir_cb'),
 						$self, @args);
 		}
+	} elsif (-d _ && $ifmt eq 'mh') {
+		my $mhr = PublicInbox::MHreader->new($input.'/', $lei->{3});
+		$mhr->{sort} = $lei->{opt}->{sort};
+		$mhr->mh_each_eml($self->can('input_mh_cb'), $self, @args);
 	} elsif (-d _ && $ifmt =~ /\A(?:v1|v2)\z/) {
 		my $ibx = PublicInbox::Inbox->new({inboxdir => $input});
 		each_ibx_eml($self, $ibx, @args);
@@ -354,13 +367,15 @@ sub prepare_inputs { # returns undef on error
 				PublicInbox::MboxReader->reads($ifmt) or return
 					$lei->fail("$ifmt not supported");
 			} elsif (-d $input_path) { # TODO extindex
-				$ifmt =~ /\A(?:maildir|v1|v2|extindex)\z/ or
+				$ifmt =~ /\A(?:maildir|mh|v1|v2|extindex)\z/ or
 					return$lei->fail("$ifmt not supported");
 				$input = $input_path;
 				add_dir $lei, $istate, $ifmt, \$input;
-			} elsif ($self->{missing_ok} && !-e _) {
+			} elsif ($self->{missing_ok} &&
+					$ifmt =~ /\A(?:maildir|mh)\z/ &&
+					!-e $input_path) {
 				# for "lei rm-watch" on missing Maildir
-				$may_sync and $input = 'maildir:'.
+				$may_sync and $input = "$ifmt:".
 						$lei->abs_path($input_path);
 			} else {
 				my $m = "Unable to handle $input";
@@ -373,7 +388,7 @@ sub prepare_inputs { # returns undef on error
 $input is `eml', not --in-format=$in_fmt
 
 			push @{$sync->{no}}, $input if $sync;
-		} elsif (-f $input && $input =~ m{\A(.+)/(new|cur)/([^/]+)\z}) {
+		} elsif ($input =~ m{\A(.+)/(new|cur)/([^/]+)\z} && -f $input) {
 			# single file in a Maildir
 			my ($mdir, $nc, $bn) = ($1, $2, $3);
 			my $other = $mdir . ($nc eq 'new' ? '/cur' : '/new');
@@ -385,12 +400,24 @@ $input is `eml', not --in-format=$in_fmt
 
 			if ($sync) {
 				$input = $lei->abs_path($mdir) . "/$nc/$bn";
-				push @{$sync->{ok}}, $input if $sync;
+				push @{$sync->{ok}}, $input;
 			}
 			require PublicInbox::MdirReader;
 		} else {
 			my $devfd = $lei->path_to_fd($input) // return;
-			if ($devfd >= 0 || -f $input || -p _) {
+			if ($devfd < 0 && $input =~ m{\A(.+)/([0-9]+)\z} &&
+					-f $input) { # single file in MH dir
+				my ($mh, $n) = ($1, $2);
+				lc($in_fmt//'eml') eq 'eml' or
+						return $lei->fail(<<"");
+$input is `eml', not --in-format=$in_fmt
+
+				if ($sync) {
+					$input = $lei->abs_path($mh)."/$n";
+					push @{$sync->{ok}}, $input;
+				}
+				require PublicInbox::MHreader;
+			} elsif ($devfd >= 0 || -f $input || -p _) {
 				push @{$sync->{no}}, $input if $sync;
 				push @f, $input;
 			} elsif (-d "$input/new" && -d "$input/cur") {
@@ -401,10 +428,13 @@ $input is `eml', not --in-format=$in_fmt
 				add_dir $lei, $istate, 'v1', \$input;
 			} elsif (-e "$input/ei.lock") {
 				add_dir $lei, $istate, 'extindex', \$input;
+			} elsif (-f "$input/.mh_sequences") {
+				add_dir $lei, $istate, 'mh', \$input;
 			} elsif ($self->{missing_ok} && !-e $input) {
 				if ($lei->{cmd} eq 'p2q') {
 					# will run "git format-patch"
 				} elsif ($may_sync) { # for lei rm-watch
+					# FIXME: support MH, here
 					$input = 'maildir:'.
 						$lei->abs_path($input);
 				}
@@ -446,6 +476,14 @@ $input is `eml', not --in-format=$in_fmt
 			$lei->refresh_watches;
 		}
 	}
+	if (my $mh = $istate->{mh}) {
+		require PublicInbox::MHreader;
+		grep(!m!\Amh:!i, @$mh) and die "BUG: @$mh (no pfx)";
+		if ($may_sync && $lei->{sto}) {
+			$lei->lms(1)->lms_write_prepare->add_folders(@$mh);
+			# $lei->refresh_watches; TODO
+		}
+	}
 	require PublicInbox::ExtSearch if $istate->{extindex};
 	$self->{inputs} = $inputs;
 }
diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm
index 17254a82..8d00d1fa 100644
--- a/lib/PublicInbox/LeiMailSync.pm
+++ b/lib/PublicInbox/LeiMailSync.pm
@@ -435,15 +435,24 @@ sub folders {
 	map { $_->[0] } @{$sth->fetchall_arrayref};
 }
 
+sub blob_mismatch ($$$) {
+	my ($f, $oidhex, $rawref) = @_;
+	my $sha = $HEXLEN2SHA{length($oidhex)};
+	my $got = git_sha($sha, $rawref)->hexdigest;
+	$got eq $oidhex ? undef : warn("$f changed $oidhex => $got\n");
+}
+
 sub local_blob {
 	my ($self, $oidhex, $vrfy) = @_;
 	my $dbh = $self->{dbh} //= dbh_new($self);
+	my $oidbin = pack('H*', $oidhex);
+
 	my $b2n = $dbh->prepare(<<'');
 SELECT f.loc,b.name FROM blob2name b
 LEFT JOIN folders f ON b.fid = f.fid
 WHERE b.oidbin = ?
 
-	$b2n->bind_param(1, pack('H*', $oidhex), SQL_BLOB);
+	$b2n->bind_param(1, $oidbin, SQL_BLOB);
 	$b2n->execute;
 	while (my ($d, $n) = $b2n->fetchrow_array) {
 		substr($d, 0, length('maildir:')) = '';
@@ -456,19 +465,27 @@ WHERE b.oidbin = ?
 			my $f = "$d/$x/$n";
 			open my $fh, '<', $f or next;
 			# some (buggy) Maildir writers are non-atomic:
-			next unless -s $fh;
-			my $raw = read_all($fh, -s _);
-			if ($vrfy) {
-				my $sha = $HEXLEN2SHA{length($oidhex)};
-				my $got = git_sha($sha, \$raw)->hexdigest;
-				if ($got ne $oidhex) {
-					warn "$f changed $oidhex => $got\n";
-					next;
-				}
-			}
+			my $raw = read_all($fh, -s $fh // next);
+			next if $vrfy && blob_mismatch $f, $oidhex, \$raw;
 			return \$raw;
 		}
 	}
+
+	$b2n = $dbh->prepare(<<'');
+SELECT f.loc,b.uid FROM blob2num b
+LEFT JOIN folders f ON b.fid = f.fid
+WHERE b.oidbin = ? /* AND f.loc LIKE 'mh:/%' */
+
+	$b2n->bind_param(1, $oidbin, SQL_BLOB);
+	$b2n->execute;
+	while (my ($d, $n) = $b2n->fetchrow_array) {
+		substr($d, 0, length('mh:')) = '';
+		my $f = "$d/$n";
+		open my $fh, '<', $f or next;
+		my $raw = read_all($fh, -s $fh // next);
+		next if $vrfy && blob_mismatch $f, $oidhex, \$raw;
+		return \$raw;
+	}
 	undef;
 }
 
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 071ba113..de75e99e 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -400,6 +400,11 @@ sub new {
 				"$dst exists and is not a directory\n";
 		$lei->{ovv}->{dst} = $dst .= '/' if substr($dst, -1) ne '/';
 		$lei->{opt}->{save} //= \1 if $lei->{cmd} eq 'q';
+	} elsif ($fmt eq 'mh') {
+		-e $dst && !-d _ and die
+				"$dst exists and is not a directory\n";
+		$lei->{ovv}->{dst} = $dst .= '/' if substr($dst, -1) ne '/';
+		$lei->{opt}->{save} //= \1 if $lei->{cmd} eq 'q';
 	} elsif (substr($fmt, 0, 4) eq 'mbox') {
 		require PublicInbox::MboxReader;
 		$self->can("eml2$fmt") or die "bad mbox format: $fmt\n";
diff --git a/lib/PublicInbox/MHreader.pm b/lib/PublicInbox/MHreader.pm
new file mode 100644
index 00000000..673e3e06
--- /dev/null
+++ b/lib/PublicInbox/MHreader.pm
@@ -0,0 +1,103 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# MH reader, based on Lib/mailbox.py in cpython source
+package PublicInbox::MHreader;
+use v5.12;
+use PublicInbox::InboxWritable qw(eml_from_path);
+use PublicInbox::OnDestroy;
+use PublicInbox::IO qw(try_cat);
+use PublicInbox::MdirSort;
+use Carp qw(carp);
+use autodie qw(chdir closedir opendir);
+
+my %FL2OFF = ( # mh_sequences key => our keyword
+	replied => 0,
+	flagged => 1,
+	unseen => 2, # negate
+);
+my @OFF2KW = qw(answered flagged); # [2] => unseen (negated)
+
+sub new {
+	my ($cls, $dir, $cwdfh) = @_;
+	if (substr($dir, -1) ne '/') { # TODO: do this earlier
+		carp "W: appending `/' to `$dir' (fix caller)\n";
+		$dir .= '/';
+	}
+	bless { dir => $dir, cwdfh => $cwdfh }, $cls;
+}
+
+sub read_mh_sequences ($) { # caller must chdir($self->{dir})
+	my ($self) = @_;
+	my ($fl, $off, @n);
+	my @seq = ('', '', '');
+	for (split /\n+/s, try_cat('.mh_sequences')) {
+		($fl, @n) = split /[: \t]+/;
+		$off = $FL2OFF{$fl} // do { warn <<EOM;
+W: unknown `$fl' in $self->{dir}.mh_sequences (ignoring)
+EOM
+			next;
+		};
+		@n = grep /\A[0-9]+\z/s, @n; # don't stat, yet
+		if (@n) {
+			@n = sort { $b <=> $a } @n; # to avoid resize
+			my $buf = '';
+			vec($buf, $_, 1) = 1 for @n;
+			$seq[$off] = $buf;
+		}
+	}
+	\@seq;
+}
+
+sub mh_each_file {
+	my ($self, $efcb, @arg) = @_;
+	opendir(my $dh, my $dir = $self->{dir});
+	my $restore = PublicInbox::OnDestroy->new($$, \&chdir, $self->{cwdfh});
+	chdir($dh);
+	if (defined(my $sort = $self->{sort})) {
+		my @sort = map {
+			my @tmp = $_ eq '' ? ('sequence') : split(/[, ]/);
+			# sorting by name alphabetically makes no sense for MH:
+			for my $k (@tmp) {
+				s/\A(\-|\+|)(?:name|)\z/$1sequence/;
+			}
+			@tmp;
+		} @$sort;
+		my @n = grep /\A[0-9]+\z/s, readdir $dh;
+		mdir_sort \@n, \@sort;
+		$efcb->($dir, $_, $self, @arg) for @n;
+	} else {
+		while (readdir $dh) { # perl v5.12+ to set $_ on readdir
+			$efcb->($dir, $_, $self, @arg) if /\A[0-9]+\z/s;
+		}
+	}
+	closedir $dh; # may die
+}
+
+sub kw_for ($$) {
+	my ($self, $n) = @_;
+	my $seq = $self->{mh_seq} //= read_mh_sequences($self);
+	my @kw = map { vec($seq->[$_], $n, 1) ? $OFF2KW[$_] : () } (0, 1);
+	vec($seq->[2], $n, 1) or push @kw, 'seen';
+	\@kw;
+}
+
+sub _file2eml { # mh_each_file cb
+	my ($dir, $n, $self, $ucb, @arg) = @_;
+	my $eml = eml_from_path($n);
+	$ucb->($dir, $n, kw_for($self, $n), $eml, @arg) if $eml;
+}
+
+sub mh_each_eml {
+	my ($self, $ucb, @arg) = @_;
+	mh_each_file($self, \&_file2eml, $ucb, @arg);
+}
+
+sub mh_read_one {
+	my ($self, $n, $ucb, @arg) = @_;
+	my $restore = PublicInbox::OnDestroy->new($$, \&chdir, $self->{cwdfh});
+	chdir(my $dir = $self->{dir});
+	_file2eml($dir, $n, $self, $ucb, @arg);
+}
+
+1;
diff --git a/lib/PublicInbox/MdirReader.pm b/lib/PublicInbox/MdirReader.pm
index db5f4545..2981b058 100644
--- a/lib/PublicInbox/MdirReader.pm
+++ b/lib/PublicInbox/MdirReader.pm
@@ -1,7 +1,7 @@
 # Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
-# Maildirs for now, MH eventually
+# Maildirs only (PublicInbox::MHreader exists, now)
 # ref: https://cr.yp.to/proto/maildir.html
 #	https://wiki2.dovecot.org/MailboxFormat/Maildir
 package PublicInbox::MdirReader;
diff --git a/lib/PublicInbox/MdirSort.pm b/lib/PublicInbox/MdirSort.pm
new file mode 100644
index 00000000..6bd9fb6c
--- /dev/null
+++ b/lib/PublicInbox/MdirSort.pm
@@ -0,0 +1,46 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# used for sorting MH (and (TODO) Maildir) names
+# TODO: consider sort(1) to parallelize sorting of gigantic directories
+package PublicInbox::MdirSort;
+use v5.12;
+use Time::HiRes ();
+use parent qw(Exporter);
+use Fcntl qw(S_ISREG);
+our @EXPORT = qw(mdir_sort);
+my %ST = (sequence => 0, size => 1, atime => 2, mtime => 3, ctime => 4);
+
+sub mdir_sort ($$;$) {
+	my ($ent, $sort, $max) = @_;
+	my @st;
+	my @ent = map {
+		@st = Time::HiRes::stat $_;
+		# name, size, {a,m,c}time
+		S_ISREG($st[2]) ? [ $_, @st[7..10] ] : ();
+	} @$ent;
+	@ent = grep { $_->[1] <= $max } @ent if $max;
+	use sort 'stable';
+	for my $s (@$sort) {
+		if ($s =~ /\A(\-|\+|)name\z/) {
+			if ($1 eq '-') {
+				@ent = sort { $b->[0] cmp $a->[0] } @ent;
+			} else {
+				@ent = sort { $a->[0] cmp $b->[0] } @ent;
+			}
+		} elsif ($s =~ /\A(\-|\+|)
+				(sequence|size|ctime|mtime|atime)\z/x) {
+			my $key = $ST{$2};
+			if ($1 eq '-') {
+				@ent = sort { $b->[$key] <=> $a->[$key] } @ent;
+			} else {
+				@ent = sort { $a->[$key] <=> $b->[$key] } @ent;
+			}
+		} else {
+			die "E: unrecognized sort parameter: `$s'";
+		}
+	}
+	@$ent = map { $_->[0] } @ent;
+}
+
+1;
diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm
index 22c50675..64fe09fa 100644
--- a/lib/PublicInbox/TestCommon.pm
+++ b/lib/PublicInbox/TestCommon.pm
@@ -24,6 +24,7 @@ BEGIN {
 	@EXPORT = qw(tmpdir tcp_server tcp_connect require_git require_mods
 		run_script start_script key2sub xsys xsys_e xqx eml_load tick
 		have_xapian_compact json_utf8 setup_public_inboxes create_inbox
+		create_dir
 		create_coderepo require_bsd kernel_version check_broken_tmpfs
 		quit_waiter_pipe wait_for_eof require_git_http_backend
 		tcp_host_port test_lei lei lei_ok $lei_out $lei_err $lei_opt
@@ -843,26 +844,24 @@ sub my_sum {
 	substr PublicInbox::SHA::sha256_hex(join('', @l)), 0, 8;
 }
 
-sub create_coderepo ($$;@) {
-	my $ident = shift;
-	my $cb = pop;
+sub create_dir (@) {
+	my ($ident, $cb) = (shift, pop);
 	my %opt = @_;
 	require PublicInbox::Lock;
 	require PublicInbox::Import;
-	my ($base) = ($0 =~ m!\b([^/]+)\.[^\.]+\z!);
-	my ($db) = (PublicInbox::Import::default_branch() =~ m!([^/]+)\z!);
 	my $tmpdir = delete $opt{tmpdir};
-	my $dir = "t/data-gen/$base.$ident-".my_sum($db, $cb, \%opt);
+	my ($base) = ($0 =~ m!\b([^/]+)\.[^\.]+\z!);
+	my $dir = "t/data-gen/$base.$ident-".my_sum($cb, \%opt);
 	require File::Path;
 	my $new = File::Path::make_path($dir);
 	my $lk = PublicInbox::Lock->new("$dir/creat.lock");
 	my $scope = $lk->lock_for_scope;
 	if (!-f "$dir/creat.stamp") {
-		opendir(my $dfh, '.');
+		opendir(my $cwd, '.');
 		chdir($dir);
 		local %ENV = (%ENV, %COMMIT_ENV);
 		$cb->($dir);
-		chdir($dfh);
+		chdir($cwd); # some $cb chdir around
 		open my $s, '>', "$dir/creat.stamp";
 	}
 	return $dir if !defined($tmpdir);
@@ -870,6 +869,13 @@ sub create_coderepo ($$;@) {
 	$tmpdir;
 }
 
+sub create_coderepo (@) {
+	my $ident = shift;
+	require PublicInbox::Import;
+	my ($db) = (PublicInbox::Import::default_branch() =~ m!([^/]+)\z!);
+	create_dir "$ident-$db", @_;
+}
+
 sub create_inbox ($;@) {
 	my $ident = shift;
 	my $cb = pop;
diff --git a/t/mh_reader.t b/t/mh_reader.t
new file mode 100644
index 00000000..4bc77c1e
--- /dev/null
+++ b/t/mh_reader.t
@@ -0,0 +1,108 @@
+#!perl -w
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use PublicInbox::TestCommon;
+require_ok 'PublicInbox::MHreader';
+use PublicInbox::IO qw(write_file);
+use PublicInbox::Lock;
+use PublicInbox::OnDestroy;
+use PublicInbox::Eml;
+use autodie;
+opendir my $cwdfh, '.';
+
+my $tmpdir = tmpdir;
+my $normal = create_dir 'normal', sub {
+	write_file '>', 3, "Subject: replied a\n\n";
+	write_file '>', 4, "Subject: replied b\n\n";
+	write_file '>', 1, "Subject: unseen\n\n";
+	write_file '>', 2, "Subject: unseen flagged\n\n";
+	write_file '>', '.mh_sequences', <<EOM;
+unseen: 1 2
+flagged: 2
+replied: 3 4
+EOM
+};
+
+my $for_sort = create_dir 'size', sub {
+	for (1..3) {
+		my $name = 10 - $_;
+		write_file '>', $name, "Subject: ".($_ x $_)."\n\n";
+	}
+};
+
+my $stale = create_dir 'stale', sub {
+	write_file '>', 4, "Subject: msg 4\n\n";
+	write_file '>', '.mh_sequences', <<EOM;
+unseen: 1 2
+EOM
+};
+
+{
+	my $mhr = PublicInbox::MHreader->new("$normal/", $cwdfh);
+	$mhr->{sort} = [ '' ];
+	my @res;
+	$mhr->mh_each_eml(sub { push @res, \@_; }, [ 'bogus' ]);
+	is scalar(@res), 4, 'got 4 messages' or diag explain(\@res);
+	is_deeply [map { $_->[1] } @res], [1, 2, 3, 4],
+		'got messages in expected order';
+	is scalar(grep { $_->[4]->[0] eq 'bogus' } @res), scalar(@res),
+		'cb arg passed to all messages' or diag explain(\@res);
+
+	$mhr = PublicInbox::MHreader->new("$stale/", $cwdfh);
+	@res = ();
+	$mhr->mh_each_eml(sub { push @res, \@_; });
+	is scalar(@res), 1, 'ignored stale messages';
+}
+
+test_lei(sub {
+	lei_ok qw(convert -f mboxrd), $normal;
+	my @msgs = grep /\S/s, split /^From .[^\n]+\n/sm, $lei_out;
+	my @eml = map { PublicInbox::Eml->new($_) } @msgs;
+	my $h = 'Subject';
+	@eml = sort { $a->header_raw($h) cmp $b->header_raw($h) } @eml;
+	my @has = map { scalar $_->header_raw($h) } @eml;
+	is_xdeeply \@has,
+		[ 'replied a', 'replied b', 'unseen', 'unseen flagged' ],
+		'subjects sorted';
+	$h = 'X-Status';
+	@has = map { scalar $_->header_raw($h) } @eml;
+	is_xdeeply \@has, [ 'A', 'A', undef, 'F' ], 'answered and flagged kw';
+	$h = 'Status';
+	@has = map { scalar $_->header_raw($h) } @eml;
+	is_xdeeply \@has, ['RO', 'RO', 'O', 'O'], 'read and old';
+	lei_ok qw(import +L:normal), $normal;
+	lei_ok qw(q L:normal -f mboxrd);
+	@msgs = grep /\S/s, split /^From .[^\n]+\n/sm, $lei_out;
+	my @eml2 = map { PublicInbox::Eml->new($_) } @msgs;
+	$h = 'Subject';
+	@eml2 = sort { $a->header_raw($h) cmp $b->header_raw($h) } @eml2;
+	is_xdeeply \@eml2, \@eml, 'import preserved kw';
+
+	lei_ok 'ls-mail-sync';
+	is $lei_out, 'mh:'.File::Spec->rel2abs($normal)."\n",
+		'mail sync stored';
+
+	lei_ok qw(convert -s size -f mboxrd), "mh:$for_sort";
+	chomp(my @s = grep /^Subject:/, split(/^/sm, $lei_out));
+	s/^Subject: // for @s;
+	is_xdeeply \@s, [ 1, 22, 333 ], 'sorted by size';
+
+	for my $s ([], [ 'name' ], [ 'sequence' ]) {
+		lei_ok qw(convert -f mboxrd), "mh:$for_sort", '-s', @$s;
+		chomp(@s = grep /^Subject:/, split(/^/sm, $lei_out));
+		s/^Subject: // for @s;
+		my $desc = "@$s" || '(default)';
+		is_xdeeply \@s, [ 333, 22, 1 ], "sorted by: $desc";
+	}
+
+	lei_ok qw(import +L:sorttest), "MH:$for_sort";
+	lei_ok 'ls-mail-sync', $for_sort;
+	is $lei_out, 'mh:'.File::Spec->rel2abs($for_sort)."\n",
+		"mail sync stored with `MH' normalized to `mh'";
+	lei_ok qw(index), 'mh:'.$stale;
+	lei qw(q -f mboxrd), 's:msg 4';
+	like $lei_out, qr/^Subject: msg 4\nStatus: RO\n\n\n/ms,
+		"message retrieved after `lei index'"
+});
+
+done_testing;

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] lei: support reading MH for convert+import+index
  2023-12-16 13:09 [PATCH] lei: support reading MH for convert+import+index Eric Wong
@ 2023-12-16 16:15 ` Konstantin Ryabitsev
  2023-12-16 18:17   ` Eric Wong
  2023-12-29 18:05 ` [PATCH v2] " Eric Wong
  1 sibling, 1 reply; 5+ messages in thread
From: Konstantin Ryabitsev @ 2023-12-16 16:15 UTC (permalink / raw)
  To: Eric Wong; +Cc: meta

On Sat, Dec 16, 2023 at 01:09:32PM +0000, Eric Wong wrote:
> The MH format is widely-supported and used by various MUAs such
> as mutt and sylpheed, and a MH-like format is used by mlmmj for
> archives, as well.  Locking implementations for writes are
> inconsistent, so this commit doesn't support writes, yet.

Nice, so eventually we should be able to specify the following instead of
faking out a maildir?

watch=mh:/var/spool/mlmmj/list.name/archive

> inotify|EVFILT_VNODE watches aren't supported, yet, either.

In the case of mlmmj it's sufficient to watch the
/var/spool/mlmmj/list.name/index file for updates, but I don't know how well
this lends itself to other implementations (I am not at all familiar with MH).

-K

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] lei: support reading MH for convert+import+index
  2023-12-16 16:15 ` Konstantin Ryabitsev
@ 2023-12-16 18:17   ` Eric Wong
  2023-12-17  7:59     ` Eric Wong
  0 siblings, 1 reply; 5+ messages in thread
From: Eric Wong @ 2023-12-16 18:17 UTC (permalink / raw)
  To: Konstantin Ryabitsev; +Cc: meta

Konstantin Ryabitsev <konstantin@linuxfoundation.org> wrote:
> Nice, so eventually we should be able to specify the following instead of
> faking out a maildir?
> 
> watch=mh:/var/spool/mlmmj/list.name/archive

Yes, that's the plan.

> > inotify|EVFILT_VNODE watches aren't supported, yet, either.
> 
> In the case of mlmmj it's sufficient to watch the
> /var/spool/mlmmj/list.name/index file for updates, but I don't know how well
> this lends itself to other implementations (I am not at all familiar with MH).

Just watching the directory itself is sufficient (like Maildir)
and will report new files.  We just have to check /\A[0-9]+\z/

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] lei: support reading MH for convert+import+index
  2023-12-16 18:17   ` Eric Wong
@ 2023-12-17  7:59     ` Eric Wong
  0 siblings, 0 replies; 5+ messages in thread
From: Eric Wong @ 2023-12-17  7:59 UTC (permalink / raw)
  To: Konstantin Ryabitsev; +Cc: meta

Eric Wong <e@80x24.org> wrote:
> Konstantin Ryabitsev <konstantin@linuxfoundation.org> wrote:
> > Nice, so eventually we should be able to specify the following instead of
> > faking out a maildir?
> > 
> > watch=mh:/var/spool/mlmmj/list.name/archive
> 
> Yes, that's the plan.

Well, reading /usr/lib/python*/mailbox.py on my system makes me cry:

    def pack(self):
        """Re-name messages to eliminate numbering gaps. Invalidates keys."""

That's for the Python stdlib MH class where I was looking for a
non-racy write implementation.

And checking the nmh source[1] reveals packing happens there, too...

Packing makes sense for a memory-efficient representation of
.mh_sequences without resorting to a tree or hash table; but it
invalidates `lei index' and forces -watch to do a full rescan if
anybody uses pack.  Ugh...

Fortunately, this doesn't seem to be the default behavior of nmh
(`nopack' appears to be the default).

[1] https://git.savannah.gnu.org/git/nmh.git sbr/folder_pack.c

> > > inotify|EVFILT_VNODE watches aren't supported, yet, either.

At least lei should have a reasonably fast way to handle this
using mail_sync.sqlite3 to compare SHA-(1|256) without having
to decode MIME/QP/Base-64 to get comparisons... I suppose
-watch needs to start using that, too...

> > In the case of mlmmj it's sufficient to watch the
> > /var/spool/mlmmj/list.name/index file for updates, but I don't know how well
> > this lends itself to other implementations (I am not at all familiar with MH).
> 
> Just watching the directory itself is sufficient (like Maildir)
> and will report new files.  We just have to check /\A[0-9]+\z/

At least mlmmj won't pack because it's an archive (or at
least it shouldn't....)

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v2] lei: support reading MH for convert+import+index
  2023-12-16 13:09 [PATCH] lei: support reading MH for convert+import+index Eric Wong
  2023-12-16 16:15 ` Konstantin Ryabitsev
@ 2023-12-29 18:05 ` Eric Wong
  1 sibling, 0 replies; 5+ messages in thread
From: Eric Wong @ 2023-12-29 18:05 UTC (permalink / raw)
  To: meta

The MH format is widely-supported and used by various MUAs such
as mutt and sylpheed, and a MH-like format is used by mlmmj for
archives, as well.  Locking implementations for writes are
inconsistent, so this commit doesn't support writes, yet.

inotify|EVFILT_VNODE watches aren't supported, yet, but that'll
have to come since MH allows packing unused integers and
renaming files.
---
v2 fixes:
* uses Perl REGEXP match via DBD::SQLite for folder filtering
* unconditionally verify blob contents
* eliminate unused $tmpdir in test

diff -u b/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm
--- b/lib/PublicInbox/LeiMailSync.pm
+++ b/lib/PublicInbox/LeiMailSync.pm
@@ -471,19 +471,20 @@
 		}
 	}
 
+	# MH, except `uid' is not always unique (can be packed)
 	$b2n = $dbh->prepare(<<'');
 SELECT f.loc,b.uid FROM blob2num b
 LEFT JOIN folders f ON b.fid = f.fid
-WHERE b.oidbin = ? /* AND f.loc LIKE 'mh:/%' */
+WHERE b.oidbin = ? AND f.loc REGEXP '^mh:/'
 
 	$b2n->bind_param(1, $oidbin, SQL_BLOB);
 	$b2n->execute;
-	while (my ($d, $n) = $b2n->fetchrow_array) {
-		substr($d, 0, length('mh:')) = '';
-		my $f = "$d/$n";
+	while (my ($f, $n) = $b2n->fetchrow_array) {
+		$f =~ s/\Amh://s or die "BUG: not MH: $f";
+		$f .= "/$n";
 		open my $fh, '<', $f or next;
 		my $raw = read_all($fh, -s $fh // next);
-		next if $vrfy && blob_mismatch $f, $oidhex, \$raw;
+		next if blob_mismatch $f, $oidhex, \$raw;
 		return \$raw;
 	}
 	undef;
diff -u b/t/mh_reader.t b/t/mh_reader.t
--- b/t/mh_reader.t
+++ b/t/mh_reader.t
@@ -10,7 +10,6 @@
 use autodie;
 opendir my $cwdfh, '.';
 
-my $tmpdir = tmpdir;
 my $normal = create_dir 'normal', sub {
 	write_file '>', 3, "Subject: replied a\n\n";
 	write_file '>', 4, "Subject: replied b\n\n";

 MANIFEST                       |   3 +
 lib/PublicInbox/LEI.pm         |  13 ++--
 lib/PublicInbox/LeiConvert.pm  |   5 ++
 lib/PublicInbox/LeiImport.pm   |  23 +++++++
 lib/PublicInbox/LeiImportKw.pm |   2 +-
 lib/PublicInbox/LeiIndex.pm    |   2 +-
 lib/PublicInbox/LeiInput.pm    |  52 +++++++++++++---
 lib/PublicInbox/LeiMailSync.pm |  40 ++++++++----
 lib/PublicInbox/LeiToMail.pm   |   5 ++
 lib/PublicInbox/MHreader.pm    | 103 +++++++++++++++++++++++++++++++
 lib/PublicInbox/MdirReader.pm  |   2 +-
 lib/PublicInbox/MdirSort.pm    |  46 ++++++++++++++
 lib/PublicInbox/TestCommon.pm  |  22 ++++---
 t/mh_reader.t                  | 107 +++++++++++++++++++++++++++++++++
 14 files changed, 392 insertions(+), 33 deletions(-)
 create mode 100644 lib/PublicInbox/MHreader.pm
 create mode 100644 lib/PublicInbox/MdirSort.pm
 create mode 100644 t/mh_reader.t

diff --git a/MANIFEST b/MANIFEST
index 109ce88a..051cd6f9 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -296,6 +296,7 @@ lib/PublicInbox/Linkify.pm
 lib/PublicInbox/Listener.pm
 lib/PublicInbox/Lock.pm
 lib/PublicInbox/MDA.pm
+lib/PublicInbox/MHreader.pm
 lib/PublicInbox/MID.pm
 lib/PublicInbox/MIME.pm
 lib/PublicInbox/MailDiff.pm
@@ -305,6 +306,7 @@ lib/PublicInbox/MboxGz.pm
 lib/PublicInbox/MboxLock.pm
 lib/PublicInbox/MboxReader.pm
 lib/PublicInbox/MdirReader.pm
+lib/PublicInbox/MdirSort.pm
 lib/PublicInbox/MiscIdx.pm
 lib/PublicInbox/MiscSearch.pm
 lib/PublicInbox/MsgIter.pm
@@ -547,6 +549,7 @@ t/mda-mime.eml
 t/mda.t
 t/mda_filter_rubylang.t
 t/mdir_reader.t
+t/mh_reader.t
 t/mid.t
 t/mime.t
 t/miscsearch.t
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 17431518..e0cfd55a 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -267,7 +267,7 @@ import => [ 'LOCATION...|--stdin [LABELS...]',
 	'one-time import/update from URL or filesystem',
 	qw(stdin| offset=i recursive|r exclude=s include|I=s new-only
 	lock=s@ in-format|F=s kw! verbose|v+ incremental! mail-sync!
-	commit-delay=i),
+	commit-delay=i sort|s:s@),
 	@net_opt, @c_opt ],
 'forget-mail-sync' => [ 'LOCATION...',
 	'forget sync information for a mail folder', @c_opt ],
@@ -280,7 +280,7 @@ import => [ 'LOCATION...|--stdin [LABELS...]',
 'convert' => [ 'LOCATION...|--stdin',
 	'one-time conversion from URL or filesystem to another format',
 	qw(stdin| in-format|F=s out-format|f=s output|mfolder|o=s lock=s@ kw!
-		rsyncable),
+		rsyncable sort|s:s@),
 	@net_opt, @c_opt ],
 'p2q' => [ 'LOCATION_OR_COMMIT...|--stdin',
 	"use a patch to generate a query for `lei q --stdin'",
@@ -321,6 +321,9 @@ import => [ 'LOCATION...|--stdin [LABELS...]',
 my $stdin_formats = [ 'MAIL_FORMAT|eml|mboxrd|mboxcl2|mboxcl|mboxo',
 			'specify message input format' ];
 my $ls_format = [ 'OUT|plain|json|null', 'listing output format' ];
+my $sort_out = [ 'VAL|received|relevance|docid',
+		"order of results is `--output'-dependent"];
+my $sort_in = [ 'sequence|mtime|size', 'sort input (format-dependent)' ];
 
 # we use \x{a0} (non-breaking SP) to avoid wrapping in PublicInbox::LeiHelp
 my %OPTDESC = (
@@ -428,8 +431,10 @@ my %OPTDESC = (
 'limit|n=i@' => ['NUM', 'limit on number of matches (default: 10000)' ],
 'offset=i' => ['OFF', 'search result offset (default: 0)'],
 
-'sort|s=s' => [ 'VAL|received|relevance|docid',
-		"order of results is `--output'-dependent"],
+'sort|s=s	q' => $sort_out,
+'sort|s=s	lcat' => $sort_out,
+'sort|s:s@	convert' => $sort_in,
+'sort|s:s@	import' => $sort_in,
 'reverse|r' => 'reverse search results', # like sort(1)
 
 'boost=i' => 'increase/decrease priority of results (default: 0)',
diff --git a/lib/PublicInbox/LeiConvert.pm b/lib/PublicInbox/LeiConvert.pm
index 8f628562..17a952f2 100644
--- a/lib/PublicInbox/LeiConvert.pm
+++ b/lib/PublicInbox/LeiConvert.pm
@@ -28,6 +28,11 @@ sub input_maildir_cb {
 	$self->{wcb}->(undef, { kw => $kw }, $eml);
 }
 
+sub input_mh_cb {
+	my ($dn, $bn, $kw, $eml, $self) = @_;
+	$self->{wcb}->(undef, { kw => $kw }, $eml);
+}
+
 sub process_inputs { # via wq_do
 	my ($self) = @_;
 	local $PublicInbox::DS::in_loop = 0; # force synchronous awaitpid
diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm
index c2552bf0..5521188c 100644
--- a/lib/PublicInbox/LeiImport.pm
+++ b/lib/PublicInbox/LeiImport.pm
@@ -53,6 +53,29 @@ sub pmdir_cb { # called via wq_io_do from LeiPmdir->each_mdir_fn
 	}
 }
 
+sub input_mh_cb {
+	my ($mhdir, $n, $kw, $eml, $self) = @_;
+	substr($mhdir, 0, 0) = 'mh:'; # add prefix
+	my $lse = $self->{lse} //= $self->{lei}->{sto}->search;
+	my $lms = $self->{-lms_rw} //= $self->{lei}->lms; # may be 0 or undef
+	my @oidbin = $lms ? $lms->num_oidbin($mhdir, $n) : ();
+	@oidbin > 1 and warn("W: $mhdir/$n not unique:\n",
+				map { "\t".unpack('H*', $_)."\n" } @oidbin);
+	my @docids = sort { $a <=> $b } uniqstr
+			map { $lse->over->oidbin_exists($_) } @oidbin;
+	if (scalar @docids) {
+		$lse->kw_changed(undef, $kw, \@docids) or return;
+	}
+	if (defined $eml) {
+		my $vmd = $self->{-import_kw} ? { kw => $kw } : undef;
+		$vmd->{sync_info} = [ $mhdir, $n + 0 ] if $self->{-mail_sync};
+		$self->input_eml_cb($eml, $vmd);
+	}
+	# TODO:
+	# elsif (my $ikw = $self->{lei}->{ikw}) { # old message, kw only
+	#	$ikw->wq_io_do('ck_update_kw', [], "mh:$dir", $uid, $kw);
+}
+
 sub input_net_cb { # imap_each / nntp_each
 	my ($uri, $uid, $kw, $eml, $self) = @_;
 	if (defined $eml) {
diff --git a/lib/PublicInbox/LeiImportKw.pm b/lib/PublicInbox/LeiImportKw.pm
index 4b8e69fb..765e23cd 100644
--- a/lib/PublicInbox/LeiImportKw.pm
+++ b/lib/PublicInbox/LeiImportKw.pm
@@ -36,7 +36,7 @@ sub ipc_atfork_child {
 sub ck_update_kw { # via wq_io_do
 	my ($self, $url, $uid, $kw) = @_;
 	my @oidbin = $self->{-lms_rw}->num_oidbin($url, $uid);
-	my $uid_url = "$url/;UID=$uid";
+	my $uid_url = index($url, 'mh:') == 0 ? $url.$uid : "$url/;UID=$uid";
 	@oidbin > 1 and warn("W: $uid_url not unique:\n",
 				map { "\t".unpack('H*', $_)."\n" } @oidbin);
 	my @docids = sort { $a <=> $b } uniqstr
diff --git a/lib/PublicInbox/LeiIndex.pm b/lib/PublicInbox/LeiIndex.pm
index b3f3e1a0..0e329e58 100644
--- a/lib/PublicInbox/LeiIndex.pm
+++ b/lib/PublicInbox/LeiIndex.pm
@@ -35,7 +35,7 @@ sub lei_index {
 
 no warnings 'once';
 no strict 'refs';
-for my $m (qw(pmdir_cb input_net_cb)) {
+for my $m (qw(pmdir_cb input_net_cb input_mh_cb)) {
 	*$m = PublicInbox::LeiImport->can($m);
 }
 
diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm
index daba9a8e..947a7a79 100644
--- a/lib/PublicInbox/LeiInput.pm
+++ b/lib/PublicInbox/LeiInput.pm
@@ -69,6 +69,11 @@ sub input_maildir_cb {
 	$self->input_eml_cb($eml);
 }
 
+sub input_mh_cb {
+	my ($dn, $n, $kw, $eml, $self) = @_;
+	$self->input_eml_cb($eml);
+}
+
 sub input_net_cb { # imap_each, nntp_each cb
 	my ($url, $uid, $kw, $eml, $self) = @_;
 	$self->input_eml_cb($eml);
@@ -190,7 +195,7 @@ sub input_path_url {
 		$ifmt = lc($1);
 	} elsif ($input =~ /\.(?:patch|eml)\z/i) {
 		$ifmt = 'eml';
-	} elsif (-f $input && $input =~ m{\A(?:.+)/(?:new|cur)/([^/]+)\z}) {
+	} elsif ($input =~ m{\A(?:.+)/(?:new|cur)/([^/]+)\z} && -f $input) {
 		my $bn = $1;
 		my $fl = PublicInbox::MdirReader::maildir_basename_flags($bn);
 		return if index($fl, 'T') >= 0;
@@ -204,6 +209,10 @@ sub input_path_url {
 	my $devfd = $lei->path_to_fd($input) // return;
 	if ($devfd >= 0) {
 		$self->input_fh($ifmt, $lei->{$devfd}, $input, @args);
+	} elsif ($devfd < 0 && $input =~ m{\A(.+/)([0-9]+)\z} && -f $input) {
+		my ($dn, $n) = ($1, $2);
+		my $mhr = PublicInbox::MHreader->new($dn, $lei->{3});
+		$mhr->mh_read_one($n, $self->can('input_mh_cb'), $self);
 	} elsif (-f $input && $ifmt eq 'eml') {
 		open my $fh, '<', $input or
 					return $lei->fail("open($input): $!");
@@ -231,6 +240,10 @@ sub input_path_url {
 						$self->can('input_maildir_cb'),
 						$self, @args);
 		}
+	} elsif (-d _ && $ifmt eq 'mh') {
+		my $mhr = PublicInbox::MHreader->new($input.'/', $lei->{3});
+		$mhr->{sort} = $lei->{opt}->{sort};
+		$mhr->mh_each_eml($self->can('input_mh_cb'), $self, @args);
 	} elsif (-d _ && $ifmt =~ /\A(?:v1|v2)\z/) {
 		my $ibx = PublicInbox::Inbox->new({inboxdir => $input});
 		each_ibx_eml($self, $ibx, @args);
@@ -354,13 +367,15 @@ sub prepare_inputs { # returns undef on error
 				PublicInbox::MboxReader->reads($ifmt) or return
 					$lei->fail("$ifmt not supported");
 			} elsif (-d $input_path) { # TODO extindex
-				$ifmt =~ /\A(?:maildir|v1|v2|extindex)\z/ or
+				$ifmt =~ /\A(?:maildir|mh|v1|v2|extindex)\z/ or
 					return$lei->fail("$ifmt not supported");
 				$input = $input_path;
 				add_dir $lei, $istate, $ifmt, \$input;
-			} elsif ($self->{missing_ok} && !-e _) {
+			} elsif ($self->{missing_ok} &&
+					$ifmt =~ /\A(?:maildir|mh)\z/ &&
+					!-e $input_path) {
 				# for "lei rm-watch" on missing Maildir
-				$may_sync and $input = 'maildir:'.
+				$may_sync and $input = "$ifmt:".
 						$lei->abs_path($input_path);
 			} else {
 				my $m = "Unable to handle $input";
@@ -373,7 +388,7 @@ sub prepare_inputs { # returns undef on error
 $input is `eml', not --in-format=$in_fmt
 
 			push @{$sync->{no}}, $input if $sync;
-		} elsif (-f $input && $input =~ m{\A(.+)/(new|cur)/([^/]+)\z}) {
+		} elsif ($input =~ m{\A(.+)/(new|cur)/([^/]+)\z} && -f $input) {
 			# single file in a Maildir
 			my ($mdir, $nc, $bn) = ($1, $2, $3);
 			my $other = $mdir . ($nc eq 'new' ? '/cur' : '/new');
@@ -385,12 +400,24 @@ $input is `eml', not --in-format=$in_fmt
 
 			if ($sync) {
 				$input = $lei->abs_path($mdir) . "/$nc/$bn";
-				push @{$sync->{ok}}, $input if $sync;
+				push @{$sync->{ok}}, $input;
 			}
 			require PublicInbox::MdirReader;
 		} else {
 			my $devfd = $lei->path_to_fd($input) // return;
-			if ($devfd >= 0 || -f $input || -p _) {
+			if ($devfd < 0 && $input =~ m{\A(.+)/([0-9]+)\z} &&
+					-f $input) { # single file in MH dir
+				my ($mh, $n) = ($1, $2);
+				lc($in_fmt//'eml') eq 'eml' or
+						return $lei->fail(<<"");
+$input is `eml', not --in-format=$in_fmt
+
+				if ($sync) {
+					$input = $lei->abs_path($mh)."/$n";
+					push @{$sync->{ok}}, $input;
+				}
+				require PublicInbox::MHreader;
+			} elsif ($devfd >= 0 || -f $input || -p _) {
 				push @{$sync->{no}}, $input if $sync;
 				push @f, $input;
 			} elsif (-d "$input/new" && -d "$input/cur") {
@@ -401,10 +428,13 @@ $input is `eml', not --in-format=$in_fmt
 				add_dir $lei, $istate, 'v1', \$input;
 			} elsif (-e "$input/ei.lock") {
 				add_dir $lei, $istate, 'extindex', \$input;
+			} elsif (-f "$input/.mh_sequences") {
+				add_dir $lei, $istate, 'mh', \$input;
 			} elsif ($self->{missing_ok} && !-e $input) {
 				if ($lei->{cmd} eq 'p2q') {
 					# will run "git format-patch"
 				} elsif ($may_sync) { # for lei rm-watch
+					# FIXME: support MH, here
 					$input = 'maildir:'.
 						$lei->abs_path($input);
 				}
@@ -446,6 +476,14 @@ $input is `eml', not --in-format=$in_fmt
 			$lei->refresh_watches;
 		}
 	}
+	if (my $mh = $istate->{mh}) {
+		require PublicInbox::MHreader;
+		grep(!m!\Amh:!i, @$mh) and die "BUG: @$mh (no pfx)";
+		if ($may_sync && $lei->{sto}) {
+			$lei->lms(1)->lms_write_prepare->add_folders(@$mh);
+			# $lei->refresh_watches; TODO
+		}
+	}
 	require PublicInbox::ExtSearch if $istate->{extindex};
 	$self->{inputs} = $inputs;
 }
diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm
index 17254a82..593715dc 100644
--- a/lib/PublicInbox/LeiMailSync.pm
+++ b/lib/PublicInbox/LeiMailSync.pm
@@ -435,15 +435,24 @@ sub folders {
 	map { $_->[0] } @{$sth->fetchall_arrayref};
 }
 
+sub blob_mismatch ($$$) {
+	my ($f, $oidhex, $rawref) = @_;
+	my $sha = $HEXLEN2SHA{length($oidhex)};
+	my $got = git_sha($sha, $rawref)->hexdigest;
+	$got eq $oidhex ? undef : warn("$f changed $oidhex => $got\n");
+}
+
 sub local_blob {
 	my ($self, $oidhex, $vrfy) = @_;
 	my $dbh = $self->{dbh} //= dbh_new($self);
+	my $oidbin = pack('H*', $oidhex);
+
 	my $b2n = $dbh->prepare(<<'');
 SELECT f.loc,b.name FROM blob2name b
 LEFT JOIN folders f ON b.fid = f.fid
 WHERE b.oidbin = ?
 
-	$b2n->bind_param(1, pack('H*', $oidhex), SQL_BLOB);
+	$b2n->bind_param(1, $oidbin, SQL_BLOB);
 	$b2n->execute;
 	while (my ($d, $n) = $b2n->fetchrow_array) {
 		substr($d, 0, length('maildir:')) = '';
@@ -456,19 +465,28 @@ WHERE b.oidbin = ?
 			my $f = "$d/$x/$n";
 			open my $fh, '<', $f or next;
 			# some (buggy) Maildir writers are non-atomic:
-			next unless -s $fh;
-			my $raw = read_all($fh, -s _);
-			if ($vrfy) {
-				my $sha = $HEXLEN2SHA{length($oidhex)};
-				my $got = git_sha($sha, \$raw)->hexdigest;
-				if ($got ne $oidhex) {
-					warn "$f changed $oidhex => $got\n";
-					next;
-				}
-			}
+			my $raw = read_all($fh, -s $fh // next);
+			next if $vrfy && blob_mismatch $f, $oidhex, \$raw;
 			return \$raw;
 		}
 	}
+
+	# MH, except `uid' is not always unique (can be packed)
+	$b2n = $dbh->prepare(<<'');
+SELECT f.loc,b.uid FROM blob2num b
+LEFT JOIN folders f ON b.fid = f.fid
+WHERE b.oidbin = ? AND f.loc REGEXP '^mh:/'
+
+	$b2n->bind_param(1, $oidbin, SQL_BLOB);
+	$b2n->execute;
+	while (my ($f, $n) = $b2n->fetchrow_array) {
+		$f =~ s/\Amh://s or die "BUG: not MH: $f";
+		$f .= "/$n";
+		open my $fh, '<', $f or next;
+		my $raw = read_all($fh, -s $fh // next);
+		next if blob_mismatch $f, $oidhex, \$raw;
+		return \$raw;
+	}
 	undef;
 }
 
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 071ba113..de75e99e 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -400,6 +400,11 @@ sub new {
 				"$dst exists and is not a directory\n";
 		$lei->{ovv}->{dst} = $dst .= '/' if substr($dst, -1) ne '/';
 		$lei->{opt}->{save} //= \1 if $lei->{cmd} eq 'q';
+	} elsif ($fmt eq 'mh') {
+		-e $dst && !-d _ and die
+				"$dst exists and is not a directory\n";
+		$lei->{ovv}->{dst} = $dst .= '/' if substr($dst, -1) ne '/';
+		$lei->{opt}->{save} //= \1 if $lei->{cmd} eq 'q';
 	} elsif (substr($fmt, 0, 4) eq 'mbox') {
 		require PublicInbox::MboxReader;
 		$self->can("eml2$fmt") or die "bad mbox format: $fmt\n";
diff --git a/lib/PublicInbox/MHreader.pm b/lib/PublicInbox/MHreader.pm
new file mode 100644
index 00000000..673e3e06
--- /dev/null
+++ b/lib/PublicInbox/MHreader.pm
@@ -0,0 +1,103 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# MH reader, based on Lib/mailbox.py in cpython source
+package PublicInbox::MHreader;
+use v5.12;
+use PublicInbox::InboxWritable qw(eml_from_path);
+use PublicInbox::OnDestroy;
+use PublicInbox::IO qw(try_cat);
+use PublicInbox::MdirSort;
+use Carp qw(carp);
+use autodie qw(chdir closedir opendir);
+
+my %FL2OFF = ( # mh_sequences key => our keyword
+	replied => 0,
+	flagged => 1,
+	unseen => 2, # negate
+);
+my @OFF2KW = qw(answered flagged); # [2] => unseen (negated)
+
+sub new {
+	my ($cls, $dir, $cwdfh) = @_;
+	if (substr($dir, -1) ne '/') { # TODO: do this earlier
+		carp "W: appending `/' to `$dir' (fix caller)\n";
+		$dir .= '/';
+	}
+	bless { dir => $dir, cwdfh => $cwdfh }, $cls;
+}
+
+sub read_mh_sequences ($) { # caller must chdir($self->{dir})
+	my ($self) = @_;
+	my ($fl, $off, @n);
+	my @seq = ('', '', '');
+	for (split /\n+/s, try_cat('.mh_sequences')) {
+		($fl, @n) = split /[: \t]+/;
+		$off = $FL2OFF{$fl} // do { warn <<EOM;
+W: unknown `$fl' in $self->{dir}.mh_sequences (ignoring)
+EOM
+			next;
+		};
+		@n = grep /\A[0-9]+\z/s, @n; # don't stat, yet
+		if (@n) {
+			@n = sort { $b <=> $a } @n; # to avoid resize
+			my $buf = '';
+			vec($buf, $_, 1) = 1 for @n;
+			$seq[$off] = $buf;
+		}
+	}
+	\@seq;
+}
+
+sub mh_each_file {
+	my ($self, $efcb, @arg) = @_;
+	opendir(my $dh, my $dir = $self->{dir});
+	my $restore = PublicInbox::OnDestroy->new($$, \&chdir, $self->{cwdfh});
+	chdir($dh);
+	if (defined(my $sort = $self->{sort})) {
+		my @sort = map {
+			my @tmp = $_ eq '' ? ('sequence') : split(/[, ]/);
+			# sorting by name alphabetically makes no sense for MH:
+			for my $k (@tmp) {
+				s/\A(\-|\+|)(?:name|)\z/$1sequence/;
+			}
+			@tmp;
+		} @$sort;
+		my @n = grep /\A[0-9]+\z/s, readdir $dh;
+		mdir_sort \@n, \@sort;
+		$efcb->($dir, $_, $self, @arg) for @n;
+	} else {
+		while (readdir $dh) { # perl v5.12+ to set $_ on readdir
+			$efcb->($dir, $_, $self, @arg) if /\A[0-9]+\z/s;
+		}
+	}
+	closedir $dh; # may die
+}
+
+sub kw_for ($$) {
+	my ($self, $n) = @_;
+	my $seq = $self->{mh_seq} //= read_mh_sequences($self);
+	my @kw = map { vec($seq->[$_], $n, 1) ? $OFF2KW[$_] : () } (0, 1);
+	vec($seq->[2], $n, 1) or push @kw, 'seen';
+	\@kw;
+}
+
+sub _file2eml { # mh_each_file cb
+	my ($dir, $n, $self, $ucb, @arg) = @_;
+	my $eml = eml_from_path($n);
+	$ucb->($dir, $n, kw_for($self, $n), $eml, @arg) if $eml;
+}
+
+sub mh_each_eml {
+	my ($self, $ucb, @arg) = @_;
+	mh_each_file($self, \&_file2eml, $ucb, @arg);
+}
+
+sub mh_read_one {
+	my ($self, $n, $ucb, @arg) = @_;
+	my $restore = PublicInbox::OnDestroy->new($$, \&chdir, $self->{cwdfh});
+	chdir(my $dir = $self->{dir});
+	_file2eml($dir, $n, $self, $ucb, @arg);
+}
+
+1;
diff --git a/lib/PublicInbox/MdirReader.pm b/lib/PublicInbox/MdirReader.pm
index db5f4545..2981b058 100644
--- a/lib/PublicInbox/MdirReader.pm
+++ b/lib/PublicInbox/MdirReader.pm
@@ -1,7 +1,7 @@
 # Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
-# Maildirs for now, MH eventually
+# Maildirs only (PublicInbox::MHreader exists, now)
 # ref: https://cr.yp.to/proto/maildir.html
 #	https://wiki2.dovecot.org/MailboxFormat/Maildir
 package PublicInbox::MdirReader;
diff --git a/lib/PublicInbox/MdirSort.pm b/lib/PublicInbox/MdirSort.pm
new file mode 100644
index 00000000..6bd9fb6c
--- /dev/null
+++ b/lib/PublicInbox/MdirSort.pm
@@ -0,0 +1,46 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# used for sorting MH (and (TODO) Maildir) names
+# TODO: consider sort(1) to parallelize sorting of gigantic directories
+package PublicInbox::MdirSort;
+use v5.12;
+use Time::HiRes ();
+use parent qw(Exporter);
+use Fcntl qw(S_ISREG);
+our @EXPORT = qw(mdir_sort);
+my %ST = (sequence => 0, size => 1, atime => 2, mtime => 3, ctime => 4);
+
+sub mdir_sort ($$;$) {
+	my ($ent, $sort, $max) = @_;
+	my @st;
+	my @ent = map {
+		@st = Time::HiRes::stat $_;
+		# name, size, {a,m,c}time
+		S_ISREG($st[2]) ? [ $_, @st[7..10] ] : ();
+	} @$ent;
+	@ent = grep { $_->[1] <= $max } @ent if $max;
+	use sort 'stable';
+	for my $s (@$sort) {
+		if ($s =~ /\A(\-|\+|)name\z/) {
+			if ($1 eq '-') {
+				@ent = sort { $b->[0] cmp $a->[0] } @ent;
+			} else {
+				@ent = sort { $a->[0] cmp $b->[0] } @ent;
+			}
+		} elsif ($s =~ /\A(\-|\+|)
+				(sequence|size|ctime|mtime|atime)\z/x) {
+			my $key = $ST{$2};
+			if ($1 eq '-') {
+				@ent = sort { $b->[$key] <=> $a->[$key] } @ent;
+			} else {
+				@ent = sort { $a->[$key] <=> $b->[$key] } @ent;
+			}
+		} else {
+			die "E: unrecognized sort parameter: `$s'";
+		}
+	}
+	@$ent = map { $_->[0] } @ent;
+}
+
+1;
diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm
index b0f28e16..d20bff28 100644
--- a/lib/PublicInbox/TestCommon.pm
+++ b/lib/PublicInbox/TestCommon.pm
@@ -24,6 +24,7 @@ BEGIN {
 	@EXPORT = qw(tmpdir tcp_server tcp_connect require_git require_mods
 		run_script start_script key2sub xsys xsys_e xqx eml_load tick
 		have_xapian_compact json_utf8 setup_public_inboxes create_inbox
+		create_dir
 		create_coderepo require_bsd kernel_version check_broken_tmpfs
 		quit_waiter_pipe wait_for_eof require_git_http_backend
 		tcp_host_port test_lei lei lei_ok $lei_out $lei_err $lei_opt
@@ -843,26 +844,24 @@ sub my_sum {
 	substr PublicInbox::SHA::sha256_hex(join('', @l)), 0, 8;
 }
 
-sub create_coderepo ($$;@) {
-	my $ident = shift;
-	my $cb = pop;
+sub create_dir (@) {
+	my ($ident, $cb) = (shift, pop);
 	my %opt = @_;
 	require PublicInbox::Lock;
 	require PublicInbox::Import;
-	my ($base) = ($0 =~ m!\b([^/]+)\.[^\.]+\z!);
-	my ($db) = (PublicInbox::Import::default_branch() =~ m!([^/]+)\z!);
 	my $tmpdir = delete $opt{tmpdir};
-	my $dir = "t/data-gen/$base.$ident-".my_sum($db, $cb, \%opt);
+	my ($base) = ($0 =~ m!\b([^/]+)\.[^\.]+\z!);
+	my $dir = "t/data-gen/$base.$ident-".my_sum($cb, \%opt);
 	require File::Path;
 	my $new = File::Path::make_path($dir);
 	my $lk = PublicInbox::Lock->new("$dir/creat.lock");
 	my $scope = $lk->lock_for_scope;
 	if (!-f "$dir/creat.stamp") {
-		opendir(my $dfh, '.');
+		opendir(my $cwd, '.');
 		chdir($dir);
 		local %ENV = (%ENV, %COMMIT_ENV);
 		$cb->($dir);
-		chdir($dfh);
+		chdir($cwd); # some $cb chdir around
 		open my $s, '>', "$dir/creat.stamp";
 	}
 	return $dir if !defined($tmpdir);
@@ -870,6 +869,13 @@ sub create_coderepo ($$;@) {
 	$tmpdir;
 }
 
+sub create_coderepo (@) {
+	my $ident = shift;
+	require PublicInbox::Import;
+	my ($db) = (PublicInbox::Import::default_branch() =~ m!([^/]+)\z!);
+	create_dir "$ident-$db", @_;
+}
+
 sub create_inbox ($;@) {
 	my $ident = shift;
 	my $cb = pop;
diff --git a/t/mh_reader.t b/t/mh_reader.t
new file mode 100644
index 00000000..e8f69fa8
--- /dev/null
+++ b/t/mh_reader.t
@@ -0,0 +1,107 @@
+#!perl -w
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use PublicInbox::TestCommon;
+require_ok 'PublicInbox::MHreader';
+use PublicInbox::IO qw(write_file);
+use PublicInbox::Lock;
+use PublicInbox::OnDestroy;
+use PublicInbox::Eml;
+use autodie;
+opendir my $cwdfh, '.';
+
+my $normal = create_dir 'normal', sub {
+	write_file '>', 3, "Subject: replied a\n\n";
+	write_file '>', 4, "Subject: replied b\n\n";
+	write_file '>', 1, "Subject: unseen\n\n";
+	write_file '>', 2, "Subject: unseen flagged\n\n";
+	write_file '>', '.mh_sequences', <<EOM;
+unseen: 1 2
+flagged: 2
+replied: 3 4
+EOM
+};
+
+my $for_sort = create_dir 'size', sub {
+	for (1..3) {
+		my $name = 10 - $_;
+		write_file '>', $name, "Subject: ".($_ x $_)."\n\n";
+	}
+};
+
+my $stale = create_dir 'stale', sub {
+	write_file '>', 4, "Subject: msg 4\n\n";
+	write_file '>', '.mh_sequences', <<EOM;
+unseen: 1 2
+EOM
+};
+
+{
+	my $mhr = PublicInbox::MHreader->new("$normal/", $cwdfh);
+	$mhr->{sort} = [ '' ];
+	my @res;
+	$mhr->mh_each_eml(sub { push @res, \@_; }, [ 'bogus' ]);
+	is scalar(@res), 4, 'got 4 messages' or diag explain(\@res);
+	is_deeply [map { $_->[1] } @res], [1, 2, 3, 4],
+		'got messages in expected order';
+	is scalar(grep { $_->[4]->[0] eq 'bogus' } @res), scalar(@res),
+		'cb arg passed to all messages' or diag explain(\@res);
+
+	$mhr = PublicInbox::MHreader->new("$stale/", $cwdfh);
+	@res = ();
+	$mhr->mh_each_eml(sub { push @res, \@_; });
+	is scalar(@res), 1, 'ignored stale messages';
+}
+
+test_lei(sub {
+	lei_ok qw(convert -f mboxrd), $normal;
+	my @msgs = grep /\S/s, split /^From .[^\n]+\n/sm, $lei_out;
+	my @eml = map { PublicInbox::Eml->new($_) } @msgs;
+	my $h = 'Subject';
+	@eml = sort { $a->header_raw($h) cmp $b->header_raw($h) } @eml;
+	my @has = map { scalar $_->header_raw($h) } @eml;
+	is_xdeeply \@has,
+		[ 'replied a', 'replied b', 'unseen', 'unseen flagged' ],
+		'subjects sorted';
+	$h = 'X-Status';
+	@has = map { scalar $_->header_raw($h) } @eml;
+	is_xdeeply \@has, [ 'A', 'A', undef, 'F' ], 'answered and flagged kw';
+	$h = 'Status';
+	@has = map { scalar $_->header_raw($h) } @eml;
+	is_xdeeply \@has, ['RO', 'RO', 'O', 'O'], 'read and old';
+	lei_ok qw(import +L:normal), $normal;
+	lei_ok qw(q L:normal -f mboxrd);
+	@msgs = grep /\S/s, split /^From .[^\n]+\n/sm, $lei_out;
+	my @eml2 = map { PublicInbox::Eml->new($_) } @msgs;
+	$h = 'Subject';
+	@eml2 = sort { $a->header_raw($h) cmp $b->header_raw($h) } @eml2;
+	is_xdeeply \@eml2, \@eml, 'import preserved kw';
+
+	lei_ok 'ls-mail-sync';
+	is $lei_out, 'mh:'.File::Spec->rel2abs($normal)."\n",
+		'mail sync stored';
+
+	lei_ok qw(convert -s size -f mboxrd), "mh:$for_sort";
+	chomp(my @s = grep /^Subject:/, split(/^/sm, $lei_out));
+	s/^Subject: // for @s;
+	is_xdeeply \@s, [ 1, 22, 333 ], 'sorted by size';
+
+	for my $s ([], [ 'name' ], [ 'sequence' ]) {
+		lei_ok qw(convert -f mboxrd), "mh:$for_sort", '-s', @$s;
+		chomp(@s = grep /^Subject:/, split(/^/sm, $lei_out));
+		s/^Subject: // for @s;
+		my $desc = "@$s" || '(default)';
+		is_xdeeply \@s, [ 333, 22, 1 ], "sorted by: $desc";
+	}
+
+	lei_ok qw(import +L:sorttest), "MH:$for_sort";
+	lei_ok 'ls-mail-sync', $for_sort;
+	is $lei_out, 'mh:'.File::Spec->rel2abs($for_sort)."\n",
+		"mail sync stored with `MH' normalized to `mh'";
+	lei_ok qw(index), 'mh:'.$stale;
+	lei qw(q -f mboxrd), 's:msg 4';
+	like $lei_out, qr/^Subject: msg 4\nStatus: RO\n\n\n/ms,
+		"message retrieved after `lei index'"
+});
+
+done_testing;

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2023-12-29 18:05 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-12-16 13:09 [PATCH] lei: support reading MH for convert+import+index Eric Wong
2023-12-16 16:15 ` Konstantin Ryabitsev
2023-12-16 18:17   ` Eric Wong
2023-12-17  7:59     ` Eric Wong
2023-12-29 18:05 ` [PATCH v2] " Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).