diff options
author | Eric Wong <e@80x24.org> | 2021-06-08 09:50:21 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2021-06-08 16:50:47 +0000 |
commit | 10b523eb017162240b1ac3647f8dcbbf2be348a7 (patch) | |
tree | 9ea63ea4c4919556a1bf5b335f365372dfa1c84a /lib/PublicInbox/LeiMailSync.pm | |
parent | ba34a69490dce6ea3ba85ee5416b6590fa0c0a39 (diff) | |
download | public-inbox-10b523eb017162240b1ac3647f8dcbbf2be348a7.tar.gz |
On a 4-core CPU, this speeds up "lei import" on a largish Maildir inbox with 75K messages from ~8 minutes down to ~40s. Parallelizing alone did not bring any improvement and may even hurt performance slightly, depending on CPU availability. However, creating the index on the "fid" and "name" columns in blob2name yields us the same speedup we got. Parallelizing IMAP makes more sense due to the fact most IMAP stores are non-local and subject to network latency. Followup-to: bdecd7ed8e0dcf0b45491b947cd737ba8cfe38a3 ("lei import: speed up kw updates for old IMAP messages")
Diffstat (limited to 'lib/PublicInbox/LeiMailSync.pm')
-rw-r--r-- | lib/PublicInbox/LeiMailSync.pm | 14 |
1 files changed, 14 insertions, 0 deletions
diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm index 75603d89..ec05404a 100644 --- a/lib/PublicInbox/LeiMailSync.pm +++ b/lib/PublicInbox/LeiMailSync.pm @@ -66,6 +66,10 @@ CREATE TABLE IF NOT EXISTS blob2name ( UNIQUE (oidbin, fid, name) ) + # speeds up LeiImport->pmdir_cb (for "lei import") by ~6x: + $dbh->do(<<''); +CREATE INDEX IF NOT EXISTS idx_fid_name ON blob2name(fid,name) + } sub fid_for { @@ -375,6 +379,16 @@ EOM $sth->fetchrow_array; } +sub name_oidbin ($$$) { + my ($self, $mdir, $nm) = @_; + my $fid = $self->{fmap}->{$mdir} //= fid_for($self, $mdir) // return; + my $sth = $self->{dbh}->prepare_cached(<<EOM, undef, 1); +SELECT oidbin FROM blob2name WHERE fid = ? AND name = ? +EOM + $sth->execute($fid, $nm); + $sth->fetchrow_array; +} + sub imap_oid { my ($self, $lei, $uid_uri) = @_; my $mailbox_uri = $uid_uri->clone; |