about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
authorEric Wong (Contractor, The Linux Foundation) <e@80x24.org>2018-03-19 08:14:39 +0000
committerEric Wong (Contractor, The Linux Foundation) <e@80x24.org>2018-03-19 08:16:34 +0000
commiteb48e7d6675babdda9a36be1a490c29a2ccddbdc (patch)
treeeaa6056df3a6eabe9952b7265c9770de6b3575f1 /lib
parentb20b8747256433b0b7b4d1ed5c415d2101044dda (diff)
downloadpublic-inbox-eb48e7d6675babdda9a36be1a490c29a2ccddbdc.tar.gz
We need to hide removals from anybody hitting the search engine.
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/Msgmap.pm8
-rw-r--r--lib/PublicInbox/SearchIdx.pm32
-rw-r--r--lib/PublicInbox/SearchIdxPart.pm8
-rw-r--r--lib/PublicInbox/SearchIdxSkeleton.pm18
-rw-r--r--lib/PublicInbox/SearchMsg.pm4
-rw-r--r--lib/PublicInbox/V2Writable.pm51
6 files changed, 109 insertions, 12 deletions
diff --git a/lib/PublicInbox/Msgmap.pm b/lib/PublicInbox/Msgmap.pm
index a147b9f3..8e81fba0 100644
--- a/lib/PublicInbox/Msgmap.pm
+++ b/lib/PublicInbox/Msgmap.pm
@@ -140,6 +140,14 @@ sub mid_delete {
         $sth->execute;
 }
 
+sub num_delete {
+        my ($self, $num) = @_;
+        my $dbh = $self->{dbh};
+        my $sth = $dbh->prepare('DELETE FROM msgmap WHERE num = ?');
+        $sth->bind_param(1, $num);
+        $sth->execute;
+}
+
 sub create_tables {
         my ($dbh) = @_;
         my $e;
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index ccec0181..ae2544da 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -440,6 +440,31 @@ sub remove_message {
         }
 }
 
+# MID is a hint in V2
+sub remove_by_oid {
+        my ($self, $oid, $mid) = @_;
+        my $db = $self->{xdb};
+
+        # XXX careful, we cannot use batch_do here since we conditionally
+        # delete documents based on other factors, so we cannot call
+        # find_doc_ids twice.
+        my ($head, $tail) = $self->find_doc_ids('Q' . $mid);
+        return if $head == $tail;
+
+        # there is only ONE element in @delete unless we
+        # have bugs in our v2writable deduplication check
+        my @delete;
+        for (; $head != $tail; $head->inc) {
+                my $docid = $head->get_docid;
+                my $doc = $db->get_document($docid);
+                my $smsg = PublicInbox::SearchMsg->wrap($doc, $mid);
+                $smsg->load_expand;
+                push(@delete, $docid) if $smsg->{blob} eq $oid;
+        }
+        $db->delete_document($_) foreach @delete;
+        scalar(@delete);
+}
+
 sub term_generator { # write-only
         my ($self) = @_;
 
@@ -896,4 +921,11 @@ sub remote_close {
         $? == 0 or die ref($self)." pid:$pid exited with: $?";
 }
 
+# triggers remove_by_oid in partition or skeleton
+sub remote_remove {
+        my ($self, $oid, $mid) = @_;
+        print { $self->{w} } "D $oid $mid\n" or
+                        die "failed to write remove $!";
+}
+
 1;
diff --git a/lib/PublicInbox/SearchIdxPart.pm b/lib/PublicInbox/SearchIdxPart.pm
index dd7ace67..c1660783 100644
--- a/lib/PublicInbox/SearchIdxPart.pm
+++ b/lib/PublicInbox/SearchIdxPart.pm
@@ -54,6 +54,14 @@ sub partition_worker_loop ($$$) {
                         $txn = undef;
                         print { $self->{skeleton}->{w} } "barrier $part\n" or
                                         die "write failed to skeleton: $!\n";
+                } elsif ($line =~ /\AD ([a-f0-9]{40,}) (.+)\n\z/s) {
+                        my ($oid, $mid) = ($1, $2);
+                        $xdb ||= $self->_xdb_acquire;
+                        if (!$txn) {
+                                $xdb->begin_transaction;
+                                $txn = 1;
+                        }
+                        $self->remove_by_oid($oid, $mid);
                 } else {
                         chomp $line;
                         my ($len, $artnum, $oid, $mid0) = split(/ /, $line);
diff --git a/lib/PublicInbox/SearchIdxSkeleton.pm b/lib/PublicInbox/SearchIdxSkeleton.pm
index 4cb10f59..beb17b9f 100644
--- a/lib/PublicInbox/SearchIdxSkeleton.pm
+++ b/lib/PublicInbox/SearchIdxSkeleton.pm
@@ -73,6 +73,14 @@ sub skeleton_worker_loop {
                                 print $barrier_note "barrier_done\n" or die
                                         "print failed to barrier note: $!";
                         }
+                } elsif ($line =~ /\AD ([a-f0-9]{40,}) (.*)\n\z/s) {
+                        my ($oid, $mid) = ($1, $2);
+                        $xdb ||= $self->_xdb_acquire;
+                        if (!$txn) {
+                                $xdb->begin_transaction;
+                                $txn = 1;
+                        }
+                        $self->remove_by_oid($oid, $mid);
                 } else {
                         my $len = int($line);
                         my $n = read($r, my $msg, $len) or die "read: $!\n";
@@ -110,6 +118,16 @@ sub index_skeleton {
         die "print failed: $err\n" if $err;
 }
 
+sub remote_remove {
+        my ($self, $oid, $mid) = @_;
+        my $err;
+        $self->_lock_acquire;
+        eval { $self->SUPER::remote_remove($oid, $mid) };
+        $err = $@;
+        $self->_lock_release;
+        die $err if $err;
+}
+
 # values: [ TS, NUM, BYTES, LINES, MID, XPATH, doc_data ]
 sub index_skeleton_real ($$) {
         my ($self, $values) = @_;
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index 23478a2a..a1cd0c28 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -64,7 +64,9 @@ sub load_doc {
 # :bytes and :lines metadata in RFC 3977
 sub bytes ($) { get_val($_[0]->{doc}, &PublicInbox::Search::BYTES) }
 sub lines ($) { get_val($_[0]->{doc}, &PublicInbox::Search::LINES) }
-sub num ($) { get_val($_[0]->{doc}, &PublicInbox::Search::NUM) }
+sub num ($) {
+        $_[0]->{num} ||= get_val($_[0]->{doc}, PublicInbox::Search::NUM)
+}
 
 sub __hdr ($$) {
         my ($self, $field) = @_;
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index e673c252..656f0693 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -199,18 +199,47 @@ sub idx_init {
 }
 
 sub remove {
-        my ($self, $mime, $msg) = @_;
-        my $existing = $self->lookup_content($mime) or return;
-
-        # don't touch ghosts or already junked messages
-        return unless $existing->type eq 'mail';
-
-        # always write removals to the current (latest) git repo since
-        # we process chronologically
+        my ($self, $mime, $cmt_msg) = @_;
+        $self->barrier;
+        $self->idx_init;
         my $im = $self->importer;
-        my ($cmt, undef) = $im->remove($mime, $msg);
-        $cmt = $im->get_mark($cmt);
-        $self->unindex_msg($existing, $cmt);
+        my $ibx = $self->{-inbox};
+        my $srch = $ibx->search;
+        my $cid = content_id($mime);
+        my $skel = $self->{skel};
+        my $parts = $self->{idx_parts};
+        my $mm = $skel->{mm};
+        my $removed;
+        my $mids = mids($mime->header_obj);
+        foreach my $mid (@$mids) {
+                $srch->reopen->each_smsg_by_mid($mid, sub {
+                        my ($smsg) = @_;
+                        $smsg->load_expand;
+                        my $msg = $ibx->msg_by_smsg($smsg);
+                        if (!defined($msg)) {
+                                warn "broken smsg for $mid\n";
+                                return 1; # continue
+                        }
+                        my $cur = PublicInbox::MIME->new($msg);
+                        if (content_id($cur) eq $cid) {
+                                $mm->num_delete($smsg->num);
+                                # $removed should only be set once assuming
+                                # no bugs in our deduplication code:
+                                $removed = $smsg;
+                                $removed->{mime} = $cur;
+                                $im->remove($cur, $cmt_msg);
+                                $removed->num; # memoize this for callers
+
+                                my $oid = $smsg->{blob};
+                                foreach my $idx (@$parts, $skel) {
+                                        $idx->remote_remove($oid, $mid);
+                                }
+                        }
+                        1; # continue
+                });
+                $self->barrier;
+        }
+        $removed;
 }
 
 sub done {