about summary refs log tree commit homepage
path: root/lib/PublicInbox
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2021-03-17 15:39:22 +0600
committerEric Wong <e@80x24.org>2021-03-17 19:03:15 +0000
commit86f7b16ee50081d4eed779372ccc198d8a1770dc (patch)
tree6af5a9310cce2ceace5bcc431f0adb93b1b57945 /lib/PublicInbox
parent4c6c853494b4936825741bb5e8885f1312639058 (diff)
downloadpublic-inbox-86f7b16ee50081d4eed779372ccc198d8a1770dc.tar.gz
lei_store: keywords => vmd (volatile metadata), prepare for labels
Since keywords and mailboxes (AKA labels) are separate things in
JMAP; and only keywords can map reliably to Maildir and mbox;
we'll keep them separate in our internal data representations,
too.

I initially wanted to call this just "meta" for "metadata", but
that might be confused with our mailing list name.  "metadata"
is already used in Xapian's own API, to add another layer of
confusion.

"tags" was also considered, but probably confusing to notmuch
users since our "labels" are analogous to "tags" in notmuch,
and notmuch doesn't seem to cover "keywords" separately...

So "vmd" it is, since we haven't used this particular
three-letter-abbreviation anywhere before; and "volatile" seems
like a good description of this metadata since everything else
up to this point has been mostly WORM (write-once, read-many).
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r--lib/PublicInbox/LeiImport.pm6
-rw-r--r--lib/PublicInbox/LeiStore.pm30
-rw-r--r--lib/PublicInbox/LeiToMail.pm2
-rw-r--r--lib/PublicInbox/SearchIdx.pm65
4 files changed, 60 insertions, 43 deletions
diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm
index 65e37371..137c22fc 100644
--- a/lib/PublicInbox/LeiImport.pm
+++ b/lib/PublicInbox/LeiImport.pm
@@ -12,7 +12,7 @@ use PublicInbox::PktOp qw(pkt_do);
 sub _import_eml { # MboxReader callback
         my ($eml, $sto, $set_kw) = @_;
         $sto->ipc_do('set_eml', $eml, $set_kw ?
-                @{PublicInbox::MboxReader::mbox_keywords($eml)} : ());
+                { kw => PublicInbox::MboxReader::mbox_keywords($eml) } : ());
 }
 
 sub import_done_wait { # dwaitpid callback
@@ -150,12 +150,12 @@ error reading $input: $!
 
 sub _import_maildir { # maildir_each_eml cb
         my ($f, $kw, $eml, $sto, $set_kw) = @_;
-        $sto->ipc_do('set_eml', $eml, $set_kw ? @$kw : ());
+        $sto->ipc_do('set_eml', $eml, $set_kw ? { kw => $kw }: ());
 }
 
 sub _import_net { # imap_each, nntp_each cb
         my ($url, $uid, $kw, $eml, $sto, $set_kw) = @_;
-        $sto->ipc_do('set_eml', $eml, $set_kw ? @$kw : ());
+        $sto->ipc_do('set_eml', $eml, $set_kw ? { kw => $kw } : ());
 }
 
 sub import_path_url {
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index 771443db..ae263914 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -129,38 +129,38 @@ sub _docids_for ($$) {
         sort { $a <=> $b } values %docids;
 }
 
-sub set_eml_keywords {
-        my ($self, $eml, @kw) = @_;
+sub set_eml_vmd {
+        my ($self, $eml, $vmd) = @_;
         my $eidx = eidx_init($self);
         my @docids = _docids_for($self, $eml);
         for my $docid (@docids) {
-                $eidx->idx_shard($docid)->ipc_do('set_keywords', $docid, @kw);
+                $eidx->idx_shard($docid)->ipc_do('set_vmd', $docid, $vmd);
         }
         \@docids;
 }
 
-sub add_eml_keywords {
-        my ($self, $eml, @kw) = @_;
+sub add_eml_vmd {
+        my ($self, $eml, $vmd) = @_;
         my $eidx = eidx_init($self);
         my @docids = _docids_for($self, $eml);
         for my $docid (@docids) {
-                $eidx->idx_shard($docid)->ipc_do('add_keywords', $docid, @kw);
+                $eidx->idx_shard($docid)->ipc_do('add_vmd', $docid, $vmd);
         }
         \@docids;
 }
 
-sub remove_eml_keywords {
-        my ($self, $eml, @kw) = @_;
+sub remove_eml_vmd {
+        my ($self, $eml, $vmd) = @_;
         my $eidx = eidx_init($self);
         my @docids = _docids_for($self, $eml);
         for my $docid (@docids) {
-                $eidx->idx_shard($docid)->ipc_do('remove_keywords', $docid, @kw)
+                $eidx->idx_shard($docid)->ipc_do('remove_vmd', $docid, $vmd);
         }
         \@docids;
 }
 
 sub add_eml {
-        my ($self, $eml, @kw) = @_;
+        my ($self, $eml, $vmd) = @_;
         my $im = $self->importer; # may create new epoch
         my $eidx = eidx_init($self); # writes ALL.git/objects/info/alternates
         my $oidx = $eidx->{oidx};
@@ -174,7 +174,7 @@ sub add_eml {
                         $oidx->add_xref3($docid, -1, $smsg->{blob}, '.');
                         # add_eidx_info for List-Id
                         $idx->ipc_do('add_eidx_info', $docid, '.', $eml);
-                        $idx->ipc_do('add_keywords', $docid, @kw) if @kw;
+                        $idx->ipc_do('add_vmd', $docid, $vmd) if $vmd;
                 }
                 \@docids;
         } else {
@@ -183,14 +183,14 @@ sub add_eml {
                 $oidx->add_xref3($smsg->{num}, -1, $smsg->{blob}, '.');
                 my $idx = $eidx->idx_shard($smsg->{num});
                 $idx->index_eml($eml, $smsg);
-                $idx->ipc_do('add_keywords', $smsg->{num}, @kw) if @kw;
+                $idx->ipc_do('add_vmd', $smsg->{num}, $vmd ) if $vmd;
                 $smsg;
         }
 }
 
 sub set_eml {
-        my ($self, $eml, @kw) = @_;
-        add_eml($self, $eml, @kw) // set_eml_keywords($self, $eml, @kw);
+        my ($self, $eml, $vmd) = @_;
+        add_eml($self, $eml, $vmd) // set_eml_vmd($self, $eml, $vmd);
 }
 
 sub add_eml_maybe {
@@ -207,7 +207,7 @@ sub set_xkw {
         if ($lxs->xids_for($eml, 1)) { # is it in a local external?
                 # TODO: index keywords only
         } else {
-                set_eml($self, $eml, @$kw);
+                set_eml($self, $eml, { kw => $kw });
         }
 }
 
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 27e1338f..5cea73e1 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -277,7 +277,7 @@ sub update_kw_maybe ($$$$) {
         return unless $lse;
         my $x = $lse->kw_changed($eml, $kw);
         if ($x) {
-                $lei->{sto}->ipc_do('set_eml', $eml, @$kw);
+                $lei->{sto}->ipc_do('set_eml', $eml, { kw => $kw });
         } elsif (!defined($x)) {
                 $lei->{sto}->ipc_do('set_xkw', $eml, $kw);
         }
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 772f5a64..e2a1a678 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -528,44 +528,61 @@ sub remove_eidx_info {
         $self->{xdb}->replace_document($docid, $doc);
 }
 
-sub set_keywords {
-        my ($self, $docid, @kw) = @_;
+my @VMD_MAP = (kw => 'K', label => 'L');
+
+sub set_vmd {
+        my ($self, $docid, $vmd) = @_;
         begin_txn_lazy($self);
         my $doc = _get_doc($self, $docid) or return;
-        my %keep = map { $_ => 1 } @kw;
-        my %add = %keep;
-        my @rm;
-        my $end = $doc->termlist_end;
-        for (my $cur = $doc->termlist_begin; $cur != $end; $cur++) {
-                $cur->skip_to('K');
-                last if $cur == $end;
-                my $kw = $cur->get_termname;
-                $kw =~ s/\AK//s or next;
-                $keep{$kw} ? delete($add{$kw}) : push(@rm, $kw);
+        my ($end, @rm, @add);
+        my @x = @VMD_MAP;
+        while (my ($field, $pfx) = splice(@x, 0, 2)) {
+                my $set = $vmd->{$field} // next;
+                my %keep = map { $_ => 1 } @$set;
+                my %add = %keep;
+                $end //= $doc->termlist_end;
+                for (my $cur = $doc->termlist_begin; $cur != $end; $cur++) {
+                        $cur->skip_to($pfx);
+                        last if $cur == $end;
+                        my $v = $cur->get_termname;
+                        $v =~ s/\A$pfx//s or next;
+                        $keep{$v} ? delete($add{$v}) : push(@rm, $pfx.$v);
+                }
+                push(@add, map { $pfx.$_ } keys %add);
         }
-        return unless (scalar(@rm) + scalar(keys %add));
-        $doc->remove_term('K'.$_) for @rm;
-        $doc->add_boolean_term('K'.$_) for (keys %add);
+        return unless scalar(@rm) || scalar(@add);
+        $doc->remove_term($_) for @rm;
+        $doc->add_boolean_term($_) for @add;
         $self->{xdb}->replace_document($docid, $doc);
 }
 
-sub add_keywords {
-        my ($self, $docid, @kw) = @_;
+sub add_vmd {
+        my ($self, $docid, $vmd) = @_;
         begin_txn_lazy($self);
         my $doc = _get_doc($self, $docid) or return;
-        $doc->add_boolean_term('K'.$_) for @kw;
+        my @x = @VMD_MAP;
+        while (my ($field, $pfx) = splice(@x, 0, 2)) {
+                my $add = $vmd->{$field} // next;
+                $doc->add_boolean_term($pfx . $_) for @$add;
+        }
         $self->{xdb}->replace_document($docid, $doc);
 }
 
-sub remove_keywords {
-        my ($self, $docid, @kw) = @_;
+sub remove_vmd {
+        my ($self, $docid, $vmd) = @_;
         begin_txn_lazy($self);
         my $doc = _get_doc($self, $docid) or return;
         my $replace;
-        eval {
-                $doc->remove_term('K'.$_);
-                $replace = 1
-        } for @kw;
+        my @x = @VMD_MAP;
+        while (my ($field, $pfx) = splice(@x, 0, 2)) {
+                my $rm = $vmd->{$field} // next;
+                for (@$rm) {
+                        eval {
+                                $doc->remove_term($pfx . $_);
+                                $replace = 1;
+                        };
+                }
+        }
         $self->{xdb}->replace_document($docid, $doc) if $replace;
 }