From bbb4e900af92c66a37ccf619fb73a8f97a99f3f5 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 25 Jul 2021 00:11:03 +0000 Subject: extsearchidx: use more appropriate max for dedupe The over.msgid table may contain ghost Message-IDs and also Message-IDs of deleted spam messages, so over->max isn't a good aproproximation of dedupe progress. --- lib/PublicInbox/ExtSearchIdx.pm | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'lib/PublicInbox/ExtSearchIdx.pm') diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index 1c2a9758..51dbf54f 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -896,7 +896,10 @@ sub eidx_dedupe ($$$) { my ($iter, $cur_mid); my $min_id = 0; my $idx = 0; - local $sync->{-regen_fmt} = "dedupe %u/".$self->{oidx}->max."\n"; + my ($max_id) = $self->{oidx}->dbh->selectrow_array(<{-regen_fmt} = "dedupe %u/$max_id\n"; # note: we could write this query more intelligently, # but that causes lock contention with read-only processes -- cgit v1.2.3-24-ge0c7