about summary refs log tree commit homepage
path: root/lib/PublicInbox/OverIdx.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2021-07-06 12:42:02 +0000
committerEric Wong <e@80x24.org>2021-07-06 13:36:54 +0000
commit8ef622d408d2e4d98ad3aada8466f539c9ac61ba (patch)
tree5a66df2970f98c9cb591b615d0f94e28a7f9b32c /lib/PublicInbox/OverIdx.pm
parentf1f2464064af3840f2f1a697b638e5b769f111af (diff)
downloadpublic-inbox-8ef622d408d2e4d98ad3aada8466f539c9ac61ba.tar.gz
This is intended to fix older indices that had deduplication
bugs for matching content.  It'll also make dealing with
future changes to ContentHash easier since that's never
guaranteed stable.

It also supports --dry-run to print changes only without
making them.
Diffstat (limited to 'lib/PublicInbox/OverIdx.pm')
-rw-r--r--lib/PublicInbox/OverIdx.pm20
1 files changed, 20 insertions, 0 deletions
diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm
index 5f96a5b0..8f7cf2bb 100644
--- a/lib/PublicInbox/OverIdx.pm
+++ b/lib/PublicInbox/OverIdx.pm
@@ -656,6 +656,26 @@ UPDATE over SET ddd = ? WHERE num = ?
         $sth->execute;
 }
 
+sub merge_xref3 { # used for "-extindex --dedupe"
+        my ($self, $keep_docid, $drop_docid, $oidhex) = @_;
+        my $oidbin = pack('H*', $oidhex);
+        my $sth = $self->{dbh}->prepare_cached(<<'');
+UPDATE OR IGNORE xref3 SET docid = ? WHERE docid = ? AND oidbin = ?
+
+        $sth->bind_param(1, $keep_docid);
+        $sth->bind_param(2, $drop_docid);
+        $sth->bind_param(3, $oidbin, SQL_BLOB);
+        $sth->execute;
+
+        # drop anything that conflicted
+        $sth = $self->{dbh}->prepare_cached(<<'');
+DELETE FROM xref3 WHERE docid = ? AND oidbin = ?
+
+        $sth->bind_param(1, $drop_docid);
+        $sth->bind_param(2, $oidbin, SQL_BLOB);
+        $sth->execute;
+}
+
 sub eidxq_add {
         my ($self, $docid) = @_;
         $self->dbh->prepare_cached(<<'')->execute($docid);