about summary refs log tree commit homepage
path: root/lib/PublicInbox/LeiImportKw.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2021-06-03 01:05:20 +0000
committerEric Wong <e@80x24.org>2021-06-03 01:09:43 +0000
commitbdecd7ed8e0dcf0b45491b947cd737ba8cfe38a3 (patch)
tree33616d6248bf6b8d2a78d2a609f5ef8389b36b47 /lib/PublicInbox/LeiImportKw.pm
parent6ff03ba2be9247f1ead26c2524fadc789de558f1 (diff)
downloadpublic-inbox-bdecd7ed8e0dcf0b45491b947cd737ba8cfe38a3.tar.gz
On a 4-core CPU, this speeds up "lei import" on a largish IMAP
inbox with 75K messages from ~21 minutes down to 40s.

Parallelizing with the new LeiImportKw WQ worker class gives a
near-linear speedup and brought the runtime down to ~5:40.

The new idx_fid_uid index on the "fid" and "uid" columns of
blob2num in mail_sync.sqlite3 brought us the final speedup.

An additional index on over.sqlite3#xref3(oidbin) did not help,
since idx_nntp already exists and speeds up the new ->oidbin_exists
internal API.

I initially experimented with a separate "lei import-kw" command
but decided against it since it's useless outside of IMAP+JMAP
and would require extra cognitive overhead for both users and
hackers.  So LeiImportKw is just a WQ worker used by "lei import"
and not its own user-visible command.

v2: fix ikw_done_wait arg handling (ugh, confusing API :x)
Diffstat (limited to 'lib/PublicInbox/LeiImportKw.pm')
-rw-r--r--lib/PublicInbox/LeiImportKw.pm55
1 files changed, 55 insertions, 0 deletions
diff --git a/lib/PublicInbox/LeiImportKw.pm b/lib/PublicInbox/LeiImportKw.pm
new file mode 100644
index 00000000..2878cbdf
--- /dev/null
+++ b/lib/PublicInbox/LeiImportKw.pm
@@ -0,0 +1,55 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# WQ worker for dealing with LeiImport IMAP flags on already-imported messages
+# WQ key: {ikw}
+package PublicInbox::LeiImportKw;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::IPC);
+
+sub new {
+        my ($cls, $lei) = @_;
+        my $self = bless { -wq_ident => 'lei import_kw worker' }, $cls;
+        my ($op_c, $ops) = $lei->workers_start($self, $self->detect_nproc);
+        $op_c->{ops} = $ops; # for PktOp->event_step
+        $lei->{ikw} = $self;
+}
+
+sub ipc_atfork_child {
+        my ($self) = @_;
+        my $lei = $self->{lei};
+        $lei->_lei_atfork_child;
+        my $net = delete $lei->{net} // die 'BUG: no lei->{net}';
+        $self->{sto} = $lei->{sto} // die 'BUG: no lei->{sto}';
+        $self->{verbose} = $lei->{opt}->{verbose};
+        $self->{lse} = $self->{sto}->search;
+        $self->{over} = $self->{lse}->over;
+        $self->{-lms_ro} = $net->{-lms_ro} || die 'BUG: net->{-lms_ro} FALSE';
+        $self->SUPER::ipc_atfork_child;
+}
+
+sub ck_update_kw { # via wq_io_do
+        my ($self, $url, $uid, $kw) = @_;
+        my $oidbin = $self->{-lms_ro}->imap_oidbin($url, $uid) // return;
+        my @docids = $self->{over}->oidbin_exists($oidbin) or return;
+        $self->{lse}->kw_changed(undef, $kw, \@docids) or return;
+        $self->{verbose} and
+                $self->{lei}->qerr('# '.unpack('H*', $oidbin)." => @$kw\n");
+        $self->{sto}->ipc_do('set_eml_vmd', undef, { kw => $kw }, \@docids);
+}
+
+sub ikw_done_wait {
+        my ($arg, $pid) = @_;
+        my ($self, $lei) = @$arg;
+        my $wait = $lei->{sto}->ipc_do('done');
+        $lei->can('wq_done_wait')->($arg, $pid);
+}
+
+sub _lei_wq_eof { # EOF callback for main lei daemon
+        my ($lei) = @_;
+        my $ikw = delete $lei->{ikw} or return $lei->fail;
+        $ikw->wq_wait_old(\&ikw_done_wait, $lei);
+}
+
+1;