diff options
author | Eric Wong <e@80x24.org> | 2021-06-03 01:05:20 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2021-06-03 01:09:43 +0000 |
commit | bdecd7ed8e0dcf0b45491b947cd737ba8cfe38a3 (patch) | |
tree | 33616d6248bf6b8d2a78d2a609f5ef8389b36b47 /lib/PublicInbox/LeiImportKw.pm | |
parent | 6ff03ba2be9247f1ead26c2524fadc789de558f1 (diff) | |
download | public-inbox-bdecd7ed8e0dcf0b45491b947cd737ba8cfe38a3.tar.gz |
On a 4-core CPU, this speeds up "lei import" on a largish IMAP inbox with 75K messages from ~21 minutes down to 40s. Parallelizing with the new LeiImportKw WQ worker class gives a near-linear speedup and brought the runtime down to ~5:40. The new idx_fid_uid index on the "fid" and "uid" columns of blob2num in mail_sync.sqlite3 brought us the final speedup. An additional index on over.sqlite3#xref3(oidbin) did not help, since idx_nntp already exists and speeds up the new ->oidbin_exists internal API. I initially experimented with a separate "lei import-kw" command but decided against it since it's useless outside of IMAP+JMAP and would require extra cognitive overhead for both users and hackers. So LeiImportKw is just a WQ worker used by "lei import" and not its own user-visible command. v2: fix ikw_done_wait arg handling (ugh, confusing API :x)
Diffstat (limited to 'lib/PublicInbox/LeiImportKw.pm')
-rw-r--r-- | lib/PublicInbox/LeiImportKw.pm | 55 |
1 files changed, 55 insertions, 0 deletions
diff --git a/lib/PublicInbox/LeiImportKw.pm b/lib/PublicInbox/LeiImportKw.pm new file mode 100644 index 00000000..2878cbdf --- /dev/null +++ b/lib/PublicInbox/LeiImportKw.pm @@ -0,0 +1,55 @@ +# Copyright (C) 2021 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +# WQ worker for dealing with LeiImport IMAP flags on already-imported messages +# WQ key: {ikw} +package PublicInbox::LeiImportKw; +use strict; +use v5.10.1; +use parent qw(PublicInbox::IPC); + +sub new { + my ($cls, $lei) = @_; + my $self = bless { -wq_ident => 'lei import_kw worker' }, $cls; + my ($op_c, $ops) = $lei->workers_start($self, $self->detect_nproc); + $op_c->{ops} = $ops; # for PktOp->event_step + $lei->{ikw} = $self; +} + +sub ipc_atfork_child { + my ($self) = @_; + my $lei = $self->{lei}; + $lei->_lei_atfork_child; + my $net = delete $lei->{net} // die 'BUG: no lei->{net}'; + $self->{sto} = $lei->{sto} // die 'BUG: no lei->{sto}'; + $self->{verbose} = $lei->{opt}->{verbose}; + $self->{lse} = $self->{sto}->search; + $self->{over} = $self->{lse}->over; + $self->{-lms_ro} = $net->{-lms_ro} || die 'BUG: net->{-lms_ro} FALSE'; + $self->SUPER::ipc_atfork_child; +} + +sub ck_update_kw { # via wq_io_do + my ($self, $url, $uid, $kw) = @_; + my $oidbin = $self->{-lms_ro}->imap_oidbin($url, $uid) // return; + my @docids = $self->{over}->oidbin_exists($oidbin) or return; + $self->{lse}->kw_changed(undef, $kw, \@docids) or return; + $self->{verbose} and + $self->{lei}->qerr('# '.unpack('H*', $oidbin)." => @$kw\n"); + $self->{sto}->ipc_do('set_eml_vmd', undef, { kw => $kw }, \@docids); +} + +sub ikw_done_wait { + my ($arg, $pid) = @_; + my ($self, $lei) = @$arg; + my $wait = $lei->{sto}->ipc_do('done'); + $lei->can('wq_done_wait')->($arg, $pid); +} + +sub _lei_wq_eof { # EOF callback for main lei daemon + my ($lei) = @_; + my $ikw = delete $lei->{ikw} or return $lei->fail; + $ikw->wq_wait_old(\&ikw_done_wait, $lei); +} + +1; |