diff options
author | Eric Wong <e@80x24.org> | 2020-12-14 11:42:40 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2020-12-19 09:32:08 +0000 |
commit | 6cdb84af2c75b3c66a35c8c4973f455da15dd0a4 (patch) | |
tree | 3069edc4159050252f1a6d15295b01a566f367d2 /lib/PublicInbox/SearchIdx.pm | |
parent | 478a8d308d952af5ae957136c2ab09455f2a767c (diff) | |
download | public-inbox-6cdb84af2c75b3c66a35c8c4973f455da15dd0a4.tar.gz |
Still unstable, this builds off the equally unstable extindex :P This will be used for caching/memoization of traditional mail stores (IMAP, Maildir, etc) while providing indexing via Xapian, along with compression, and checksumming from git. Most notably, this adds the ability to add/remove per-message keywords (draft, seen, flagged, answered) as described in the JMAP specification (RFC 8621 section 4.1.1). We'll use `.' (a single period) as an $eidx_key since it's an invalid {inboxdir} or {newsgroup} name.
Diffstat (limited to 'lib/PublicInbox/SearchIdx.pm')
-rw-r--r-- | lib/PublicInbox/SearchIdx.pm | 47 |
1 files changed, 44 insertions, 3 deletions
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index b731f698..548f2114 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -1,6 +1,6 @@ # Copyright (C) 2015-2020 all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> -# based on notmuch, but with no concept of folders, files or flags +# based on notmuch, but with no concept of folders, files # # Indexes mail with Xapian and our (SQLite-based) ::Msgmap for use # with the web and NNTP interfaces. This index maintains thread @@ -371,7 +371,7 @@ sub eml2doc ($$$;$) { index_headers($self, $smsg); if (defined(my $eidx_key = $smsg->{eidx_key})) { - $doc->add_boolean_term('O'.$eidx_key); + $doc->add_boolean_term('O'.$eidx_key) if $eidx_key ne '.'; } msg_iter($eml, \&index_xapian, [ $self, $doc ]); index_ids($self, $doc, $eml, $mids); @@ -467,7 +467,7 @@ sub add_eidx_info { begin_txn_lazy($self); my $doc = _get_doc($self, $docid) or return; term_generator($self)->set_document($doc); - $doc->add_boolean_term('O'.$eidx_key); + $doc->add_boolean_term('O'.$eidx_key) if $eidx_key ne '.'; index_list_id($self, $doc, $eml); $self->{xdb}->replace_document($docid, $doc); } @@ -501,6 +501,47 @@ sub remove_eidx_info { $self->{xdb}->replace_document($docid, $doc); } +sub set_keywords { + my ($self, $docid, @kw) = @_; + begin_txn_lazy($self); + my $doc = _get_doc($self, $docid) or return; + my %keep = map { $_ => 1 } @kw; + my %add = %keep; + my @rm; + my $end = $doc->termlist_end; + for (my $cur = $doc->termlist_begin; $cur != $end; $cur++) { + $cur->skip_to('K'); + last if $cur == $end; + my $kw = $cur->get_termname; + $kw =~ s/\AK//s or next; + $keep{$kw} ? delete($add{$kw}) : push(@rm, $kw); + } + return unless (scalar(@rm) + scalar(keys %add)); + $doc->remove_term('K'.$_) for @rm; + $doc->add_boolean_term('K'.$_) for (keys %add); + $self->{xdb}->replace_document($docid, $doc); +} + +sub add_keywords { + my ($self, $docid, @kw) = @_; + begin_txn_lazy($self); + my $doc = _get_doc($self, $docid) or return; + $doc->add_boolean_term('K'.$_) for @kw; + $self->{xdb}->replace_document($docid, $doc); +} + +sub remove_keywords { + my ($self, $docid, @kw) = @_; + begin_txn_lazy($self); + my $doc = _get_doc($self, $docid) or return; + my $replace; + eval { + $doc->remove_term('K'.$_); + $replace = 1 + } for @kw; + $self->{xdb}->replace_document($docid, $doc) if $replace; +} + sub get_val ($$) { my ($doc, $col) = @_; sortable_unserialise($doc->get_value($col)); |