about summary refs log tree commit homepage
path: root/lib/PublicInbox/SearchIdx.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2020-12-14 11:42:40 +0000
committerEric Wong <e@80x24.org>2020-12-19 09:32:08 +0000
commit6cdb84af2c75b3c66a35c8c4973f455da15dd0a4 (patch)
tree3069edc4159050252f1a6d15295b01a566f367d2 /lib/PublicInbox/SearchIdx.pm
parent478a8d308d952af5ae957136c2ab09455f2a767c (diff)
downloadpublic-inbox-6cdb84af2c75b3c66a35c8c4973f455da15dd0a4.tar.gz
Still unstable, this builds off the equally unstable extindex :P

This will be used for caching/memoization of traditional mail
stores (IMAP, Maildir, etc) while providing indexing via Xapian,
along with compression, and checksumming from git.

Most notably, this adds the ability to add/remove per-message
keywords (draft, seen, flagged, answered) as described in the
JMAP specification (RFC 8621 section 4.1.1).

We'll use `.' (a single period) as an $eidx_key since it's an
invalid {inboxdir} or {newsgroup} name.
Diffstat (limited to 'lib/PublicInbox/SearchIdx.pm')
-rw-r--r--lib/PublicInbox/SearchIdx.pm47
1 files changed, 44 insertions, 3 deletions
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index b731f698..548f2114 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -1,6 +1,6 @@
 # Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-# based on notmuch, but with no concept of folders, files or flags
+# based on notmuch, but with no concept of folders, files
 #
 # Indexes mail with Xapian and our (SQLite-based) ::Msgmap for use
 # with the web and NNTP interfaces.  This index maintains thread
@@ -371,7 +371,7 @@ sub eml2doc ($$$;$) {
         index_headers($self, $smsg);
 
         if (defined(my $eidx_key = $smsg->{eidx_key})) {
-                $doc->add_boolean_term('O'.$eidx_key);
+                $doc->add_boolean_term('O'.$eidx_key) if $eidx_key ne '.';
         }
         msg_iter($eml, \&index_xapian, [ $self, $doc ]);
         index_ids($self, $doc, $eml, $mids);
@@ -467,7 +467,7 @@ sub add_eidx_info {
         begin_txn_lazy($self);
         my $doc = _get_doc($self, $docid) or return;
         term_generator($self)->set_document($doc);
-        $doc->add_boolean_term('O'.$eidx_key);
+        $doc->add_boolean_term('O'.$eidx_key) if $eidx_key ne '.';
         index_list_id($self, $doc, $eml);
         $self->{xdb}->replace_document($docid, $doc);
 }
@@ -501,6 +501,47 @@ sub remove_eidx_info {
         $self->{xdb}->replace_document($docid, $doc);
 }
 
+sub set_keywords {
+        my ($self, $docid, @kw) = @_;
+        begin_txn_lazy($self);
+        my $doc = _get_doc($self, $docid) or return;
+        my %keep = map { $_ => 1 } @kw;
+        my %add = %keep;
+        my @rm;
+        my $end = $doc->termlist_end;
+        for (my $cur = $doc->termlist_begin; $cur != $end; $cur++) {
+                $cur->skip_to('K');
+                last if $cur == $end;
+                my $kw = $cur->get_termname;
+                $kw =~ s/\AK//s or next;
+                $keep{$kw} ? delete($add{$kw}) : push(@rm, $kw);
+        }
+        return unless (scalar(@rm) + scalar(keys %add));
+        $doc->remove_term('K'.$_) for @rm;
+        $doc->add_boolean_term('K'.$_) for (keys %add);
+        $self->{xdb}->replace_document($docid, $doc);
+}
+
+sub add_keywords {
+        my ($self, $docid, @kw) = @_;
+        begin_txn_lazy($self);
+        my $doc = _get_doc($self, $docid) or return;
+        $doc->add_boolean_term('K'.$_) for @kw;
+        $self->{xdb}->replace_document($docid, $doc);
+}
+
+sub remove_keywords {
+        my ($self, $docid, @kw) = @_;
+        begin_txn_lazy($self);
+        my $doc = _get_doc($self, $docid) or return;
+        my $replace;
+        eval {
+                $doc->remove_term('K'.$_);
+                $replace = 1
+        } for @kw;
+        $self->{xdb}->replace_document($docid, $doc) if $replace;
+}
+
 sub get_val ($$) {
         my ($doc, $col) = @_;
         sortable_unserialise($doc->get_value($col));