diff options
-rw-r--r-- | Documentation/public-inbox-glossary.pod | 13 | ||||
-rw-r--r-- | lib/PublicInbox/LeiImport.pm | 6 | ||||
-rw-r--r-- | lib/PublicInbox/LeiStore.pm | 30 | ||||
-rw-r--r-- | lib/PublicInbox/LeiToMail.pm | 2 | ||||
-rw-r--r-- | lib/PublicInbox/SearchIdx.pm | 65 | ||||
-rw-r--r-- | t/lei_store.t | 28 |
6 files changed, 85 insertions, 59 deletions
diff --git a/Documentation/public-inbox-glossary.pod b/Documentation/public-inbox-glossary.pod index 61e1e9f8..10b3f9d6 100644 --- a/Documentation/public-inbox-glossary.pod +++ b/Documentation/public-inbox-glossary.pod @@ -69,8 +69,8 @@ L<public-inbox-nntpd(1)> or L<public-inbox-imapd(1)> Private, per-message keywords or flags as described in RFC 8621 section 10.4. These are conveyed in the C<Status:> and -C<X-Status:> headers for L<mbox(5)>, as IMAP FLAGS (RFC 3501 section 2.3.2), -or Maildir info flags. +C<X-Status:> headers for L<mbox(5)>, as system IMAP FLAGS +(RFC 3501 section 2.3.2), or Maildir info flags. L<public-inbox-watch(1)> ignores drafts and trashed (deleted) messages. L<lei-import(1)> ignores trashed (deleted) messages, @@ -83,6 +83,15 @@ the same email into one or more virtual folders for ease-of-filtering. This is NOT tied to public-inbox names, as messages stored by lei may not be public. +These are similar in spirit to arbitrary freeform "tags" +in mail software such as L<notmuch(1)> and non-system IMAP FLAGS. + +=item volatile metadata (VMD) + +For L<lei(1)> users only, this refers to the combination of +keywords and labels which are subject to frequent change +independently of immutable message content. + =item IMAP INTERNALDATE, JMAP receivedAt, rt: search prefix The first valid timestamp value of Received: headers (top first). diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm index 65e37371..137c22fc 100644 --- a/lib/PublicInbox/LeiImport.pm +++ b/lib/PublicInbox/LeiImport.pm @@ -12,7 +12,7 @@ use PublicInbox::PktOp qw(pkt_do); sub _import_eml { # MboxReader callback my ($eml, $sto, $set_kw) = @_; $sto->ipc_do('set_eml', $eml, $set_kw ? - @{PublicInbox::MboxReader::mbox_keywords($eml)} : ()); + { kw => PublicInbox::MboxReader::mbox_keywords($eml) } : ()); } sub import_done_wait { # dwaitpid callback @@ -150,12 +150,12 @@ error reading $input: $! sub _import_maildir { # maildir_each_eml cb my ($f, $kw, $eml, $sto, $set_kw) = @_; - $sto->ipc_do('set_eml', $eml, $set_kw ? @$kw : ()); + $sto->ipc_do('set_eml', $eml, $set_kw ? { kw => $kw }: ()); } sub _import_net { # imap_each, nntp_each cb my ($url, $uid, $kw, $eml, $sto, $set_kw) = @_; - $sto->ipc_do('set_eml', $eml, $set_kw ? @$kw : ()); + $sto->ipc_do('set_eml', $eml, $set_kw ? { kw => $kw } : ()); } sub import_path_url { diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm index 771443db..ae263914 100644 --- a/lib/PublicInbox/LeiStore.pm +++ b/lib/PublicInbox/LeiStore.pm @@ -129,38 +129,38 @@ sub _docids_for ($$) { sort { $a <=> $b } values %docids; } -sub set_eml_keywords { - my ($self, $eml, @kw) = @_; +sub set_eml_vmd { + my ($self, $eml, $vmd) = @_; my $eidx = eidx_init($self); my @docids = _docids_for($self, $eml); for my $docid (@docids) { - $eidx->idx_shard($docid)->ipc_do('set_keywords', $docid, @kw); + $eidx->idx_shard($docid)->ipc_do('set_vmd', $docid, $vmd); } \@docids; } -sub add_eml_keywords { - my ($self, $eml, @kw) = @_; +sub add_eml_vmd { + my ($self, $eml, $vmd) = @_; my $eidx = eidx_init($self); my @docids = _docids_for($self, $eml); for my $docid (@docids) { - $eidx->idx_shard($docid)->ipc_do('add_keywords', $docid, @kw); + $eidx->idx_shard($docid)->ipc_do('add_vmd', $docid, $vmd); } \@docids; } -sub remove_eml_keywords { - my ($self, $eml, @kw) = @_; +sub remove_eml_vmd { + my ($self, $eml, $vmd) = @_; my $eidx = eidx_init($self); my @docids = _docids_for($self, $eml); for my $docid (@docids) { - $eidx->idx_shard($docid)->ipc_do('remove_keywords', $docid, @kw) + $eidx->idx_shard($docid)->ipc_do('remove_vmd', $docid, $vmd); } \@docids; } sub add_eml { - my ($self, $eml, @kw) = @_; + my ($self, $eml, $vmd) = @_; my $im = $self->importer; # may create new epoch my $eidx = eidx_init($self); # writes ALL.git/objects/info/alternates my $oidx = $eidx->{oidx}; @@ -174,7 +174,7 @@ sub add_eml { $oidx->add_xref3($docid, -1, $smsg->{blob}, '.'); # add_eidx_info for List-Id $idx->ipc_do('add_eidx_info', $docid, '.', $eml); - $idx->ipc_do('add_keywords', $docid, @kw) if @kw; + $idx->ipc_do('add_vmd', $docid, $vmd) if $vmd; } \@docids; } else { @@ -183,14 +183,14 @@ sub add_eml { $oidx->add_xref3($smsg->{num}, -1, $smsg->{blob}, '.'); my $idx = $eidx->idx_shard($smsg->{num}); $idx->index_eml($eml, $smsg); - $idx->ipc_do('add_keywords', $smsg->{num}, @kw) if @kw; + $idx->ipc_do('add_vmd', $smsg->{num}, $vmd ) if $vmd; $smsg; } } sub set_eml { - my ($self, $eml, @kw) = @_; - add_eml($self, $eml, @kw) // set_eml_keywords($self, $eml, @kw); + my ($self, $eml, $vmd) = @_; + add_eml($self, $eml, $vmd) // set_eml_vmd($self, $eml, $vmd); } sub add_eml_maybe { @@ -207,7 +207,7 @@ sub set_xkw { if ($lxs->xids_for($eml, 1)) { # is it in a local external? # TODO: index keywords only } else { - set_eml($self, $eml, @$kw); + set_eml($self, $eml, { kw => $kw }); } } diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm index 27e1338f..5cea73e1 100644 --- a/lib/PublicInbox/LeiToMail.pm +++ b/lib/PublicInbox/LeiToMail.pm @@ -277,7 +277,7 @@ sub update_kw_maybe ($$$$) { return unless $lse; my $x = $lse->kw_changed($eml, $kw); if ($x) { - $lei->{sto}->ipc_do('set_eml', $eml, @$kw); + $lei->{sto}->ipc_do('set_eml', $eml, { kw => $kw }); } elsif (!defined($x)) { $lei->{sto}->ipc_do('set_xkw', $eml, $kw); } diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 772f5a64..e2a1a678 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -528,44 +528,61 @@ sub remove_eidx_info { $self->{xdb}->replace_document($docid, $doc); } -sub set_keywords { - my ($self, $docid, @kw) = @_; +my @VMD_MAP = (kw => 'K', label => 'L'); + +sub set_vmd { + my ($self, $docid, $vmd) = @_; begin_txn_lazy($self); my $doc = _get_doc($self, $docid) or return; - my %keep = map { $_ => 1 } @kw; - my %add = %keep; - my @rm; - my $end = $doc->termlist_end; - for (my $cur = $doc->termlist_begin; $cur != $end; $cur++) { - $cur->skip_to('K'); - last if $cur == $end; - my $kw = $cur->get_termname; - $kw =~ s/\AK//s or next; - $keep{$kw} ? delete($add{$kw}) : push(@rm, $kw); + my ($end, @rm, @add); + my @x = @VMD_MAP; + while (my ($field, $pfx) = splice(@x, 0, 2)) { + my $set = $vmd->{$field} // next; + my %keep = map { $_ => 1 } @$set; + my %add = %keep; + $end //= $doc->termlist_end; + for (my $cur = $doc->termlist_begin; $cur != $end; $cur++) { + $cur->skip_to($pfx); + last if $cur == $end; + my $v = $cur->get_termname; + $v =~ s/\A$pfx//s or next; + $keep{$v} ? delete($add{$v}) : push(@rm, $pfx.$v); + } + push(@add, map { $pfx.$_ } keys %add); } - return unless (scalar(@rm) + scalar(keys %add)); - $doc->remove_term('K'.$_) for @rm; - $doc->add_boolean_term('K'.$_) for (keys %add); + return unless scalar(@rm) || scalar(@add); + $doc->remove_term($_) for @rm; + $doc->add_boolean_term($_) for @add; $self->{xdb}->replace_document($docid, $doc); } -sub add_keywords { - my ($self, $docid, @kw) = @_; +sub add_vmd { + my ($self, $docid, $vmd) = @_; begin_txn_lazy($self); my $doc = _get_doc($self, $docid) or return; - $doc->add_boolean_term('K'.$_) for @kw; + my @x = @VMD_MAP; + while (my ($field, $pfx) = splice(@x, 0, 2)) { + my $add = $vmd->{$field} // next; + $doc->add_boolean_term($pfx . $_) for @$add; + } $self->{xdb}->replace_document($docid, $doc); } -sub remove_keywords { - my ($self, $docid, @kw) = @_; +sub remove_vmd { + my ($self, $docid, $vmd) = @_; begin_txn_lazy($self); my $doc = _get_doc($self, $docid) or return; my $replace; - eval { - $doc->remove_term('K'.$_); - $replace = 1 - } for @kw; + my @x = @VMD_MAP; + while (my ($field, $pfx) = splice(@x, 0, 2)) { + my $rm = $vmd->{$field} // next; + for (@$rm) { + eval { + $doc->remove_term($pfx . $_); + $replace = 1; + }; + } + } $self->{xdb}->replace_document($docid, $doc) if $replace; } diff --git a/t/lei_store.t b/t/lei_store.t index d270e1f6..024ff527 100644 --- a/t/lei_store.t +++ b/t/lei_store.t @@ -36,37 +36,37 @@ $sto->done; for my $parallel (0, 1) { $sto->{priv_eidx}->{parallel} = $parallel; - my $docids = $sto->set_eml_keywords($eml, qw(seen draft)); + my $docids = $sto->set_eml_vmd($eml, { kw => [ qw(seen draft) ] }); is(scalar @$docids, 1, 'set keywords on one doc'); $sto->done; my @kw = $sto->search->msg_keywords($docids->[0]); is_deeply(\@kw, [qw(draft seen)], 'kw matches'); - $docids = $sto->add_eml_keywords($eml, qw(seen draft)); + $docids = $sto->add_eml_vmd($eml, {kw => [qw(seen draft)]}); $sto->done; is(scalar @$docids, 1, 'idempotently added keywords to doc'); @kw = $sto->search->msg_keywords($docids->[0]); is_deeply(\@kw, [qw(draft seen)], 'kw matches after noop'); - $docids = $sto->remove_eml_keywords($eml, qw(seen draft)); + $docids = $sto->remove_eml_vmd($eml, {kw => [qw(seen draft)]}); is(scalar @$docids, 1, 'removed from one doc'); $sto->done; @kw = $sto->search->msg_keywords($docids->[0]); is_deeply(\@kw, [], 'kw matches after remove'); - $docids = $sto->remove_eml_keywords($eml, qw(answered)); + $docids = $sto->remove_eml_vmd($eml, {kw=> [qw(answered)]}); is(scalar @$docids, 1, 'removed from one doc (idempotently)'); $sto->done; @kw = $sto->search->msg_keywords($docids->[0]); is_deeply(\@kw, [], 'kw matches after remove (idempotent)'); - $docids = $sto->add_eml_keywords($eml, qw(answered)); + $docids = $sto->add_eml_vmd($eml, {kw => [qw(answered)]}); is(scalar @$docids, 1, 'added to empty doc'); $sto->done; @kw = $sto->search->msg_keywords($docids->[0]); is_deeply(\@kw, ['answered'], 'kw matches after add'); - $docids = $sto->set_eml_keywords($eml); + $docids = $sto->set_eml_vmd($eml, { kw => [] }); is(scalar @$docids, 1, 'set to clobber'); $sto->done; @kw = $sto->search->msg_keywords($docids->[0]); @@ -74,11 +74,11 @@ for my $parallel (0, 1) { my $set = eml_load('t/plack-qp.eml'); $set->header_set('Message-ID', "<set\@$parallel>"); - my $ret = $sto->set_eml($set, 'seen'); + my $ret = $sto->set_eml($set, { kw => [ 'seen' ] }); is(ref $ret, 'PublicInbox::Smsg', 'initial returns smsg'); - my $ids = $sto->set_eml($set, qw(seen)); + my $ids = $sto->set_eml($set, { kw => [ 'seen' ] }); is_deeply($ids, [ $ret->{num} ], 'set_eml idempotent'); - $ids = $sto->set_eml($set, qw(seen answered)); + $ids = $sto->set_eml($set, { kw => [ qw(seen answered) ] }); is_deeply($ids, [ $ret->{num} ], 'set_eml to change kw'); $sto->done; @kw = $sto->search->msg_keywords($ids->[0]); @@ -91,23 +91,23 @@ SKIP: { $eml->header_set('Message-ID', '<ipc-test@example>'); my $pid = $sto->ipc_worker_spawn('lei-store'); ok($pid > 0, 'got a worker'); - my $smsg = $sto->ipc_do('set_eml', $eml, qw(seen)); + my $smsg = $sto->ipc_do('set_eml', $eml, { kw => [ qw(seen) ] }); is(ref($smsg), 'PublicInbox::Smsg', 'set_eml works over ipc'); - my $ids = $sto->ipc_do('set_eml', $eml, qw(seen)); + my $ids = $sto->ipc_do('set_eml', $eml, { kw => [ qw(seen) ] }); is_deeply($ids, [ $smsg->{num} ], 'docid returned'); $eml->header_set('Message-ID'); - my $no_mid = $sto->ipc_do('set_eml', $eml, qw(seen)); + my $no_mid = $sto->ipc_do('set_eml', $eml, { kw => [ qw(seen) ] }); my $wait = $sto->ipc_do('done'); my @kw = $sto->search->msg_keywords($no_mid->{num}); is_deeply(\@kw, [qw(seen)], 'ipc set changed kw'); is(ref($smsg), 'PublicInbox::Smsg', 'no mid works ipc'); - $ids = $sto->ipc_do('set_eml', $eml, qw(seen)); + $ids = $sto->ipc_do('set_eml', $eml, { kw => [ qw(seen) ] }); is_deeply($ids, [ $no_mid->{num} ], 'docid returned w/o mid w/ ipc'); $sto->ipc_do('done'); $sto->ipc_worker_stop; - $ids = $sto->ipc_do('set_eml', $eml, qw(seen answered)); + $ids = $sto->ipc_do('set_eml', $eml, { kw => [ qw(seen answered) ] }); is_deeply($ids, [ $no_mid->{num} ], 'docid returned w/o mid w/o ipc'); $wait = $sto->ipc_do('done'); |