about summary refs log tree commit homepage
diff options
context:
space:
mode:
-rw-r--r--Documentation/public-inbox-glossary.pod13
-rw-r--r--lib/PublicInbox/LeiImport.pm6
-rw-r--r--lib/PublicInbox/LeiStore.pm30
-rw-r--r--lib/PublicInbox/LeiToMail.pm2
-rw-r--r--lib/PublicInbox/SearchIdx.pm65
-rw-r--r--t/lei_store.t28
6 files changed, 85 insertions, 59 deletions
diff --git a/Documentation/public-inbox-glossary.pod b/Documentation/public-inbox-glossary.pod
index 61e1e9f8..10b3f9d6 100644
--- a/Documentation/public-inbox-glossary.pod
+++ b/Documentation/public-inbox-glossary.pod
@@ -69,8 +69,8 @@ L<public-inbox-nntpd(1)> or L<public-inbox-imapd(1)>
 
 Private, per-message keywords or flags as described in RFC 8621
 section 10.4.  These are conveyed in the C<Status:> and
-C<X-Status:> headers for L<mbox(5)>, as IMAP FLAGS (RFC 3501 section 2.3.2),
-or Maildir info flags.
+C<X-Status:> headers for L<mbox(5)>, as system IMAP FLAGS
+(RFC 3501 section 2.3.2), or Maildir info flags.
 
 L<public-inbox-watch(1)> ignores drafts and trashed (deleted)
 messages.  L<lei-import(1)> ignores trashed (deleted) messages,
@@ -83,6 +83,15 @@ the same email into one or more virtual folders for
 ease-of-filtering.  This is NOT tied to public-inbox names, as
 messages stored by lei may not be public.
 
+These are similar in spirit to arbitrary freeform "tags"
+in mail software such as L<notmuch(1)> and non-system IMAP FLAGS.
+
+=item volatile metadata (VMD)
+
+For L<lei(1)> users only, this refers to the combination of
+keywords and labels which are subject to frequent change
+independently of immutable message content.
+
 =item IMAP INTERNALDATE, JMAP receivedAt, rt: search prefix
 
 The first valid timestamp value of Received: headers (top first).
diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm
index 65e37371..137c22fc 100644
--- a/lib/PublicInbox/LeiImport.pm
+++ b/lib/PublicInbox/LeiImport.pm
@@ -12,7 +12,7 @@ use PublicInbox::PktOp qw(pkt_do);
 sub _import_eml { # MboxReader callback
         my ($eml, $sto, $set_kw) = @_;
         $sto->ipc_do('set_eml', $eml, $set_kw ?
-                @{PublicInbox::MboxReader::mbox_keywords($eml)} : ());
+                { kw => PublicInbox::MboxReader::mbox_keywords($eml) } : ());
 }
 
 sub import_done_wait { # dwaitpid callback
@@ -150,12 +150,12 @@ error reading $input: $!
 
 sub _import_maildir { # maildir_each_eml cb
         my ($f, $kw, $eml, $sto, $set_kw) = @_;
-        $sto->ipc_do('set_eml', $eml, $set_kw ? @$kw : ());
+        $sto->ipc_do('set_eml', $eml, $set_kw ? { kw => $kw }: ());
 }
 
 sub _import_net { # imap_each, nntp_each cb
         my ($url, $uid, $kw, $eml, $sto, $set_kw) = @_;
-        $sto->ipc_do('set_eml', $eml, $set_kw ? @$kw : ());
+        $sto->ipc_do('set_eml', $eml, $set_kw ? { kw => $kw } : ());
 }
 
 sub import_path_url {
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index 771443db..ae263914 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -129,38 +129,38 @@ sub _docids_for ($$) {
         sort { $a <=> $b } values %docids;
 }
 
-sub set_eml_keywords {
-        my ($self, $eml, @kw) = @_;
+sub set_eml_vmd {
+        my ($self, $eml, $vmd) = @_;
         my $eidx = eidx_init($self);
         my @docids = _docids_for($self, $eml);
         for my $docid (@docids) {
-                $eidx->idx_shard($docid)->ipc_do('set_keywords', $docid, @kw);
+                $eidx->idx_shard($docid)->ipc_do('set_vmd', $docid, $vmd);
         }
         \@docids;
 }
 
-sub add_eml_keywords {
-        my ($self, $eml, @kw) = @_;
+sub add_eml_vmd {
+        my ($self, $eml, $vmd) = @_;
         my $eidx = eidx_init($self);
         my @docids = _docids_for($self, $eml);
         for my $docid (@docids) {
-                $eidx->idx_shard($docid)->ipc_do('add_keywords', $docid, @kw);
+                $eidx->idx_shard($docid)->ipc_do('add_vmd', $docid, $vmd);
         }
         \@docids;
 }
 
-sub remove_eml_keywords {
-        my ($self, $eml, @kw) = @_;
+sub remove_eml_vmd {
+        my ($self, $eml, $vmd) = @_;
         my $eidx = eidx_init($self);
         my @docids = _docids_for($self, $eml);
         for my $docid (@docids) {
-                $eidx->idx_shard($docid)->ipc_do('remove_keywords', $docid, @kw)
+                $eidx->idx_shard($docid)->ipc_do('remove_vmd', $docid, $vmd);
         }
         \@docids;
 }
 
 sub add_eml {
-        my ($self, $eml, @kw) = @_;
+        my ($self, $eml, $vmd) = @_;
         my $im = $self->importer; # may create new epoch
         my $eidx = eidx_init($self); # writes ALL.git/objects/info/alternates
         my $oidx = $eidx->{oidx};
@@ -174,7 +174,7 @@ sub add_eml {
                         $oidx->add_xref3($docid, -1, $smsg->{blob}, '.');
                         # add_eidx_info for List-Id
                         $idx->ipc_do('add_eidx_info', $docid, '.', $eml);
-                        $idx->ipc_do('add_keywords', $docid, @kw) if @kw;
+                        $idx->ipc_do('add_vmd', $docid, $vmd) if $vmd;
                 }
                 \@docids;
         } else {
@@ -183,14 +183,14 @@ sub add_eml {
                 $oidx->add_xref3($smsg->{num}, -1, $smsg->{blob}, '.');
                 my $idx = $eidx->idx_shard($smsg->{num});
                 $idx->index_eml($eml, $smsg);
-                $idx->ipc_do('add_keywords', $smsg->{num}, @kw) if @kw;
+                $idx->ipc_do('add_vmd', $smsg->{num}, $vmd ) if $vmd;
                 $smsg;
         }
 }
 
 sub set_eml {
-        my ($self, $eml, @kw) = @_;
-        add_eml($self, $eml, @kw) // set_eml_keywords($self, $eml, @kw);
+        my ($self, $eml, $vmd) = @_;
+        add_eml($self, $eml, $vmd) // set_eml_vmd($self, $eml, $vmd);
 }
 
 sub add_eml_maybe {
@@ -207,7 +207,7 @@ sub set_xkw {
         if ($lxs->xids_for($eml, 1)) { # is it in a local external?
                 # TODO: index keywords only
         } else {
-                set_eml($self, $eml, @$kw);
+                set_eml($self, $eml, { kw => $kw });
         }
 }
 
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 27e1338f..5cea73e1 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -277,7 +277,7 @@ sub update_kw_maybe ($$$$) {
         return unless $lse;
         my $x = $lse->kw_changed($eml, $kw);
         if ($x) {
-                $lei->{sto}->ipc_do('set_eml', $eml, @$kw);
+                $lei->{sto}->ipc_do('set_eml', $eml, { kw => $kw });
         } elsif (!defined($x)) {
                 $lei->{sto}->ipc_do('set_xkw', $eml, $kw);
         }
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 772f5a64..e2a1a678 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -528,44 +528,61 @@ sub remove_eidx_info {
         $self->{xdb}->replace_document($docid, $doc);
 }
 
-sub set_keywords {
-        my ($self, $docid, @kw) = @_;
+my @VMD_MAP = (kw => 'K', label => 'L');
+
+sub set_vmd {
+        my ($self, $docid, $vmd) = @_;
         begin_txn_lazy($self);
         my $doc = _get_doc($self, $docid) or return;
-        my %keep = map { $_ => 1 } @kw;
-        my %add = %keep;
-        my @rm;
-        my $end = $doc->termlist_end;
-        for (my $cur = $doc->termlist_begin; $cur != $end; $cur++) {
-                $cur->skip_to('K');
-                last if $cur == $end;
-                my $kw = $cur->get_termname;
-                $kw =~ s/\AK//s or next;
-                $keep{$kw} ? delete($add{$kw}) : push(@rm, $kw);
+        my ($end, @rm, @add);
+        my @x = @VMD_MAP;
+        while (my ($field, $pfx) = splice(@x, 0, 2)) {
+                my $set = $vmd->{$field} // next;
+                my %keep = map { $_ => 1 } @$set;
+                my %add = %keep;
+                $end //= $doc->termlist_end;
+                for (my $cur = $doc->termlist_begin; $cur != $end; $cur++) {
+                        $cur->skip_to($pfx);
+                        last if $cur == $end;
+                        my $v = $cur->get_termname;
+                        $v =~ s/\A$pfx//s or next;
+                        $keep{$v} ? delete($add{$v}) : push(@rm, $pfx.$v);
+                }
+                push(@add, map { $pfx.$_ } keys %add);
         }
-        return unless (scalar(@rm) + scalar(keys %add));
-        $doc->remove_term('K'.$_) for @rm;
-        $doc->add_boolean_term('K'.$_) for (keys %add);
+        return unless scalar(@rm) || scalar(@add);
+        $doc->remove_term($_) for @rm;
+        $doc->add_boolean_term($_) for @add;
         $self->{xdb}->replace_document($docid, $doc);
 }
 
-sub add_keywords {
-        my ($self, $docid, @kw) = @_;
+sub add_vmd {
+        my ($self, $docid, $vmd) = @_;
         begin_txn_lazy($self);
         my $doc = _get_doc($self, $docid) or return;
-        $doc->add_boolean_term('K'.$_) for @kw;
+        my @x = @VMD_MAP;
+        while (my ($field, $pfx) = splice(@x, 0, 2)) {
+                my $add = $vmd->{$field} // next;
+                $doc->add_boolean_term($pfx . $_) for @$add;
+        }
         $self->{xdb}->replace_document($docid, $doc);
 }
 
-sub remove_keywords {
-        my ($self, $docid, @kw) = @_;
+sub remove_vmd {
+        my ($self, $docid, $vmd) = @_;
         begin_txn_lazy($self);
         my $doc = _get_doc($self, $docid) or return;
         my $replace;
-        eval {
-                $doc->remove_term('K'.$_);
-                $replace = 1
-        } for @kw;
+        my @x = @VMD_MAP;
+        while (my ($field, $pfx) = splice(@x, 0, 2)) {
+                my $rm = $vmd->{$field} // next;
+                for (@$rm) {
+                        eval {
+                                $doc->remove_term($pfx . $_);
+                                $replace = 1;
+                        };
+                }
+        }
         $self->{xdb}->replace_document($docid, $doc) if $replace;
 }
 
diff --git a/t/lei_store.t b/t/lei_store.t
index d270e1f6..024ff527 100644
--- a/t/lei_store.t
+++ b/t/lei_store.t
@@ -36,37 +36,37 @@ $sto->done;
 
 for my $parallel (0, 1) {
         $sto->{priv_eidx}->{parallel} = $parallel;
-        my $docids = $sto->set_eml_keywords($eml, qw(seen draft));
+        my $docids = $sto->set_eml_vmd($eml, { kw => [ qw(seen draft) ] });
         is(scalar @$docids, 1, 'set keywords on one doc');
         $sto->done;
         my @kw = $sto->search->msg_keywords($docids->[0]);
         is_deeply(\@kw, [qw(draft seen)], 'kw matches');
 
-        $docids = $sto->add_eml_keywords($eml, qw(seen draft));
+        $docids = $sto->add_eml_vmd($eml, {kw => [qw(seen draft)]});
         $sto->done;
         is(scalar @$docids, 1, 'idempotently added keywords to doc');
         @kw = $sto->search->msg_keywords($docids->[0]);
         is_deeply(\@kw, [qw(draft seen)], 'kw matches after noop');
 
-        $docids = $sto->remove_eml_keywords($eml, qw(seen draft));
+        $docids = $sto->remove_eml_vmd($eml, {kw => [qw(seen draft)]});
         is(scalar @$docids, 1, 'removed from one doc');
         $sto->done;
         @kw = $sto->search->msg_keywords($docids->[0]);
         is_deeply(\@kw, [], 'kw matches after remove');
 
-        $docids = $sto->remove_eml_keywords($eml, qw(answered));
+        $docids = $sto->remove_eml_vmd($eml, {kw=> [qw(answered)]});
         is(scalar @$docids, 1, 'removed from one doc (idempotently)');
         $sto->done;
         @kw = $sto->search->msg_keywords($docids->[0]);
         is_deeply(\@kw, [], 'kw matches after remove (idempotent)');
 
-        $docids = $sto->add_eml_keywords($eml, qw(answered));
+        $docids = $sto->add_eml_vmd($eml, {kw => [qw(answered)]});
         is(scalar @$docids, 1, 'added to empty doc');
         $sto->done;
         @kw = $sto->search->msg_keywords($docids->[0]);
         is_deeply(\@kw, ['answered'], 'kw matches after add');
 
-        $docids = $sto->set_eml_keywords($eml);
+        $docids = $sto->set_eml_vmd($eml, { kw => [] });
         is(scalar @$docids, 1, 'set to clobber');
         $sto->done;
         @kw = $sto->search->msg_keywords($docids->[0]);
@@ -74,11 +74,11 @@ for my $parallel (0, 1) {
 
         my $set = eml_load('t/plack-qp.eml');
         $set->header_set('Message-ID', "<set\@$parallel>");
-        my $ret = $sto->set_eml($set, 'seen');
+        my $ret = $sto->set_eml($set, { kw => [ 'seen' ] });
         is(ref $ret, 'PublicInbox::Smsg', 'initial returns smsg');
-        my $ids = $sto->set_eml($set, qw(seen));
+        my $ids = $sto->set_eml($set, { kw => [ 'seen' ] });
         is_deeply($ids, [ $ret->{num} ], 'set_eml idempotent');
-        $ids = $sto->set_eml($set, qw(seen answered));
+        $ids = $sto->set_eml($set, { kw => [ qw(seen answered) ] });
         is_deeply($ids, [ $ret->{num} ], 'set_eml to change kw');
         $sto->done;
         @kw = $sto->search->msg_keywords($ids->[0]);
@@ -91,23 +91,23 @@ SKIP: {
         $eml->header_set('Message-ID', '<ipc-test@example>');
         my $pid = $sto->ipc_worker_spawn('lei-store');
         ok($pid > 0, 'got a worker');
-        my $smsg = $sto->ipc_do('set_eml', $eml, qw(seen));
+        my $smsg = $sto->ipc_do('set_eml', $eml, { kw => [ qw(seen) ] });
         is(ref($smsg), 'PublicInbox::Smsg', 'set_eml works over ipc');
-        my $ids = $sto->ipc_do('set_eml', $eml, qw(seen));
+        my $ids = $sto->ipc_do('set_eml', $eml, { kw => [ qw(seen) ] });
         is_deeply($ids, [ $smsg->{num} ], 'docid returned');
 
         $eml->header_set('Message-ID');
-        my $no_mid = $sto->ipc_do('set_eml', $eml, qw(seen));
+        my $no_mid = $sto->ipc_do('set_eml', $eml, { kw => [ qw(seen) ] });
         my $wait = $sto->ipc_do('done');
         my @kw = $sto->search->msg_keywords($no_mid->{num});
         is_deeply(\@kw, [qw(seen)], 'ipc set changed kw');
 
         is(ref($smsg), 'PublicInbox::Smsg', 'no mid works ipc');
-        $ids = $sto->ipc_do('set_eml', $eml, qw(seen));
+        $ids = $sto->ipc_do('set_eml', $eml, { kw => [ qw(seen) ] });
         is_deeply($ids, [ $no_mid->{num} ], 'docid returned w/o mid w/ ipc');
         $sto->ipc_do('done');
         $sto->ipc_worker_stop;
-        $ids = $sto->ipc_do('set_eml', $eml, qw(seen answered));
+        $ids = $sto->ipc_do('set_eml', $eml, { kw => [ qw(seen answered) ] });
         is_deeply($ids, [ $no_mid->{num} ], 'docid returned w/o mid w/o ipc');
         $wait = $sto->ipc_do('done');