From 68fea5b055787c65f0e7164cbd5463f140382ea9 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 17 Dec 2020 09:20:29 +0000 Subject: lei_store: keyword extraction from mbox and Maildir Dovecot, mutt, and likely much other software support mbox Status/X-Status headers. Ensure we have a way to extract these headers as JMAP-compatible keywords before removing them for git storage. ->add_eml now accepts setting keywords at import time, and will probably be called like this: $lst->add_eml($eml, $lst->mbox_keywords($eml)); $lst->add_eml($eml, $lst->maildir_keywords($fn)); --- lib/PublicInbox/LeiStore.pm | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'lib/PublicInbox/LeiStore.pm') diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm index c95df785..553adbc8 100644 --- a/lib/PublicInbox/LeiStore.pm +++ b/lib/PublicInbox/LeiStore.pm @@ -162,8 +162,27 @@ sub remove_eml_keywords { \@docids; } +# cf: https://doc.dovecot.org/configuration_manual/mail_location/mbox/ +my %status2kw = (F => 'flagged', A => 'answered', R => 'seen', T => 'draft'); +# O (old/non-recent), and D (deleted) aren't in JMAP, +# so probably won't be supported by us. +sub mbox_keywords { + my $eml = $_[-1]; + my $s = "@{[$eml->header_raw('X-Status'),$eml->header_raw('Status')]}"; + my %kw; + $s =~ s/([FART])/$kw{$status2kw{$1}} = 1/sge; + sort(keys %kw); +} + +# cf: https://cr.yp.to/proto/maildir.html +my %c2kw = ('D' => 'draft', F => 'flagged', R => 'answered', S => 'seen'); +sub maildir_keywords { + $_[-1] =~ /:2,([A-Z]+)\z/i ? + sort(map { $c2kw{$_} // () } split(//, $1)) : (); +} + sub add_eml { - my ($self, $eml) = @_; + my ($self, $eml, @kw) = @_; my $eidx = eidx_init($self); my $oidx = $eidx->{oidx}; my $smsg = bless { -oidx => $oidx }, 'PublicInbox::Smsg'; @@ -178,6 +197,7 @@ sub add_eml { my $idx = $eidx->idx_shard($docid); $oidx->add_xref3($docid, -1, $smsg->{blob}, '.'); $idx->shard_add_eidx_info($docid, '.', $eml); # List-Id + $idx->shard_add_keywords($docid, @kw) if @kw; } } else { $smsg->{num} = $oidx->adj_counter('eidx_docid', '+'); @@ -185,6 +205,7 @@ sub add_eml { $oidx->add_xref3($smsg->{num}, -1, $smsg->{blob}, '.'); my $idx = $eidx->idx_shard($smsg->{num}); $idx->index_raw($msgref, $eml, $smsg); + $idx->shard_add_keywords($smsg->{num}, @kw) if @kw; } $smsg->{blob} } -- cgit v1.2.3-24-ge0c7