From bdecd7ed8e0dcf0b45491b947cd737ba8cfe38a3 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 3 Jun 2021 01:05:20 +0000 Subject: lei import: speed up kw updates for old IMAP messages On a 4-core CPU, this speeds up "lei import" on a largish IMAP inbox with 75K messages from ~21 minutes down to 40s. Parallelizing with the new LeiImportKw WQ worker class gives a near-linear speedup and brought the runtime down to ~5:40. The new idx_fid_uid index on the "fid" and "uid" columns of blob2num in mail_sync.sqlite3 brought us the final speedup. An additional index on over.sqlite3#xref3(oidbin) did not help, since idx_nntp already exists and speeds up the new ->oidbin_exists internal API. I initially experimented with a separate "lei import-kw" command but decided against it since it's useless outside of IMAP+JMAP and would require extra cognitive overhead for both users and hackers. So LeiImportKw is just a WQ worker used by "lei import" and not its own user-visible command. v2: fix ikw_done_wait arg handling (ugh, confusing API :x) --- lib/PublicInbox/Over.pm | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'lib/PublicInbox/Over.pm') diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm index 0e191c47..58fdea0e 100644 --- a/lib/PublicInbox/Over.pm +++ b/lib/PublicInbox/Over.pm @@ -349,13 +349,13 @@ sub check_inodes { } } -sub blob_exists { - my ($self, $oidhex) = @_; +sub oidbin_exists { + my ($self, $oidbin) = @_; if (wantarray) { my $sth = $self->dbh->prepare_cached(<<'', undef, 1); SELECT docid FROM xref3 WHERE oidbin = ? ORDER BY docid ASC - $sth->bind_param(1, pack('H*', $oidhex), SQL_BLOB); + $sth->bind_param(1, $oidbin, SQL_BLOB); $sth->execute; my $tmp = $sth->fetchall_arrayref; map { $_->[0] } @$tmp; @@ -363,10 +363,12 @@ SELECT docid FROM xref3 WHERE oidbin = ? ORDER BY docid ASC my $sth = $self->dbh->prepare_cached(<<'', undef, 1); SELECT COUNT(*) FROM xref3 WHERE oidbin = ? - $sth->bind_param(1, pack('H*', $oidhex), SQL_BLOB); + $sth->bind_param(1, $oidbin, SQL_BLOB); $sth->execute; $sth->fetchrow_array; } } +sub blob_exists { oidbin_exists($_[0], pack('H*', $_[1])) } + 1; -- cgit v1.2.3-24-ge0c7