about summary refs log tree commit homepage
path: root/lib/PublicInbox/SearchIdx.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2020-10-27 07:54:22 +0000
committerEric Wong <e@80x24.org>2020-11-07 10:18:53 +0000
commit98c03415fed31cce80f8b2030aee9311890c79d6 (patch)
treef066f704d796f3a7e00e6268868189accf95b793 /lib/PublicInbox/SearchIdx.pm
parentcf27d2a29bf2a0c8cb670b9d775fab405ace9f79 (diff)
downloadpublic-inbox-98c03415fed31cce80f8b2030aee9311890c79d6.tar.gz
Not yet tested, but Perl compiles it!
Diffstat (limited to 'lib/PublicInbox/SearchIdx.pm')
-rw-r--r--lib/PublicInbox/SearchIdx.pm50
1 files changed, 31 insertions, 19 deletions
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 5171c610..0458d9c3 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -472,29 +472,41 @@ sub remove_xref3 {
         begin_txn_lazy($self);
         my $doc = _get_doc($self, $docid, $oid) or return;
         my $xref3 = PublicInbox::Smsg::xref3(undef, $doc);
+        my %x3 = map { $_ => undef } @$xref3;
         for (grep(/\A\Q$eidx_key\E:[0-9]+:\Q$oid\E\z/, @$xref3)) {
+                delete $x3{$_};
                 $doc->remove_term('P' . $_);
         }
-        for my $l ($eml->header_raw('List-Id')) {
-                $l =~ /<([^>]+)>/ or next;
-                my $lid = lc $1;
-                $doc->remove_term('G' . $lid);
-
-                # nb: we don't remove the XL probabilistic terms
-                # since terms may overlap if cross-posted.
-                #
-                # IOW, a message which has both <foo.example.com>
-                # and <bar.example.com> would have overlapping
-                # "XLexample" and "XLcom" as terms and which we
-                # wouldn't know if they're safe to remove if we just
-                # unindex <foo.example.com> while preserving
-                # <bar.example.com>.
-                #
-                # In any case, this entire sub is will likely never
-                # be needed and users using the "l:" prefix are probably
-                # rarer.
+        if (scalar(keys(%x3)) == 0) {
+                $self->{xdb}->delete_document($docid);
+                if (my $del_fh = $self->{del_fh}) { # TODO
+                        print $del_fh $docid, "\n" or die "E: print $!";
+                }
+        } else {
+                if (!grep(/\A\Q$eidx_key\E:/, keys %x3)) {
+                        $doc->remove_term('O'.$eidx_key);
+                }
+                for my $l ($eml->header_raw('List-Id')) {
+                        $l =~ /<([^>]+)>/ or next;
+                        my $lid = lc $1;
+                        $doc->remove_term('G' . $lid);
+
+                        # nb: we don't remove the XL probabilistic terms
+                        # since terms may overlap if cross-posted.
+                        #
+                        # IOW, a message which has both <foo.example.com>
+                        # and <bar.example.com> would have overlapping
+                        # "XLexample" and "XLcom" as terms and which we
+                        # wouldn't know if they're safe to remove if we just
+                        # unindex <foo.example.com> while preserving
+                        # <bar.example.com>.
+                        #
+                        # In any case, this entire sub is will likely never
+                        # be needed and users using the "l:" prefix are probably
+                        # rarer.
+                }
+                $self->{xdb}->replace_document($docid, $doc);
         }
-        $self->{xdb}->replace_document($docid, $doc);
 }
 
 sub get_val ($$) {