From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF, T_SCC_BODY_TEXT_LINE shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 2E0A41F61A for ; Wed, 17 Aug 2022 09:33:18 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1660728798; bh=ogtG5LvOY/8sMQxWYR7mbTUhU8nA9KQaNIR6LspXFkk=; h=From:To:Subject:Date:In-Reply-To:References:From; b=PXrbODA3PaOLCr+apoxmjQlfmWktrFRL4ehj79ADK7qqRLCGqvpUwCy/7BXgqlTz+ wOvriBjgTdiL6PAnyKkyZX/uoZgaj1ZKZf+KkvYaLnHgQHRIjq6rrkHC0GGHRBLUs4 f2uunUii6iCKlmUyXNOL2HTXfweQ3Q77WvF53jyA= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 4/4] lei reindex: new command to reindex lei/store Date: Wed, 17 Aug 2022 09:33:17 +0000 Message-Id: <20220817093317.3820774-5-e@80x24.org> In-Reply-To: <20220817093317.3820774-1-e@80x24.org> References: <20220817093317.3820774-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: --- Documentation/lei-reindex.pod | 47 +++++++++++++++++++++++++++++++++ MANIFEST | 2 ++ lib/PublicInbox/LEI.pm | 2 ++ lib/PublicInbox/LeiReindex.pm | 49 +++++++++++++++++++++++++++++++++++ lib/PublicInbox/LeiStore.pm | 32 ++++++++++++++++++++++- 5 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 Documentation/lei-reindex.pod create mode 100644 lib/PublicInbox/LeiReindex.pm diff --git a/Documentation/lei-reindex.pod b/Documentation/lei-reindex.pod new file mode 100644 index 00000000..3a5861c4 --- /dev/null +++ b/Documentation/lei-reindex.pod @@ -0,0 +1,47 @@ +=head1 NAME + +lei-reindex - reindex messages already in lei/store + +=head1 SYNOPSIS + +lei reindex [OPTIONS] + +=head1 DESCRIPTION + +Forces a re-index of all messages previously-indexed by L +or L. This can be used for in-place upgrades and bugfixes +while other processes are querying the store. Keep in mind this roughly +doubles the size of the already-large Xapian database. + +It does not re-index messages in externals, using the C<--reindex> +switch of L or L is +needed for that. + +=head1 OPTIONS + +=over + +=item -q + +=item --quiet + +Suppress feedback messages. + +=back + +=head1 CONTACT + +Feedback welcome via plain-text mail to L + +The mail archives are hosted at L and +L + +=head1 COPYRIGHT + +Copyright all contributors L + +License: AGPL-3.0+ L + +=head1 SEE ALSO + +L, L diff --git a/MANIFEST b/MANIFEST index cc0a9a4c..27e4c4e0 100644 --- a/MANIFEST +++ b/MANIFEST @@ -56,6 +56,7 @@ Documentation/lei-p2q.pod Documentation/lei-q.pod Documentation/lei-rediff.pod Documentation/lei-refresh-mail-sync.pod +Documentation/lei-reindex.pod Documentation/lei-rm-watch.pod Documentation/lei-rm.pod Documentation/lei-security.pod @@ -256,6 +257,7 @@ lib/PublicInbox/LeiPmdir.pm lib/PublicInbox/LeiQuery.pm lib/PublicInbox/LeiRediff.pm lib/PublicInbox/LeiRefreshMailSync.pm +lib/PublicInbox/LeiReindex.pm lib/PublicInbox/LeiRemote.pm lib/PublicInbox/LeiRm.pm lib/PublicInbox/LeiRmWatch.pm diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index 595b3fa9..8a3a3ab6 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -253,6 +253,8 @@ our %CMD = ( # sorted in order of importance/use: 'forget-watch' => [ '{WATCH_NUMBER|--prune}', 'stop and forget a watch', qw(prune), @c_opt ], +'reindex' => [ '', 'reindex all locally-indexed messages', @c_opt ], + 'index' => [ 'LOCATION...', 'one-time index from URL or filesystem', qw(in-format|F=s kw! offset=i recursive|r exclude=s include|I=s verbose|v+ incremental!), @net_opt, # mainly for --proxy= diff --git a/lib/PublicInbox/LeiReindex.pm b/lib/PublicInbox/LeiReindex.pm new file mode 100644 index 00000000..3f109f33 --- /dev/null +++ b/lib/PublicInbox/LeiReindex.pm @@ -0,0 +1,49 @@ +# Copyright all contributors +# License: AGPL-3.0+ + +# "lei reindex" command to reindex everything in lei/store +package PublicInbox::LeiReindex; +use v5.12; +use parent qw(PublicInbox::IPC); + +sub reindex_full { + my ($lei) = @_; + my $sto = $lei->{sto}; + my $max = $sto->search->over(1)->max; + $lei->qerr("# reindexing 1..$max"); + $sto->wq_do('reindex_art', $_) for (1..$max); +} + +sub reindex_store { # via wq_do + my ($self) = @_; + my ($lei, $argv) = delete @$self{qw(lei argv)}; + if (!@$argv) { + reindex_full($lei); + } +} + +sub lei_reindex { + my ($lei, @argv) = @_; + my $sto = $lei->_lei_store or return $lei->fail('nothing indexed'); + $sto->write_prepare($lei); + my $self = bless { lei => $lei, argv => \@argv }, __PACKAGE__; + my ($op_c, $ops) = $lei->workers_start($self, 1); + $lei->{wq1} = $self; + $lei->wait_wq_events($op_c, $ops); + $self->wq_do('reindex_store'); + $self->wq_close; +} + +sub _lei_wq_eof { # EOF callback for main lei daemon + my ($lei) = @_; + $lei->{sto}->wq_do('reindex_done'); + $lei->wq_eof; +} + +sub ipc_atfork_child { + my ($self) = @_; + $self->{lei}->_lei_atfork_child; + $self->SUPER::ipc_atfork_child; +} + +1; diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm index d49746cb..277ed6bd 100644 --- a/lib/PublicInbox/LeiStore.pm +++ b/lib/PublicInbox/LeiStore.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2020-2021 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ # # Local storage (cache/memo) for lei(1), suitable for personal/private @@ -335,6 +335,36 @@ sub _docids_and_maybe_kw ($$) { ($docids, [ sort keys %$kw ]); } +sub _reindex_1 { # git->cat_async callback + my ($bref, $hex, $type, $size, $smsg) = @_; + my ($self, $eidx, $tl) = delete @$smsg{qw(-self -eidx -tl)}; + $bref //= _lms_rw($self)->local_blob($hex, 1); + if ($bref) { + my $eml = PublicInbox::Eml->new($bref); + $smsg->{-merge_vmd} = 1; # preserve existing keywords + $eidx->idx_shard($smsg->{num})->index_eml($eml, $smsg); + } else { + warn("E: $type $hex\n"); + } +} + +sub reindex_art { + my ($self, $art) = @_; + my ($eidx, $tl) = eidx_init($self); + my $smsg = $eidx->{oidx}->get_art($art) // return; + return if $smsg->{bytes} == 0; # external-only message + @$smsg{qw(-self -eidx -tl)} = ($self, $eidx, $tl); + $eidx->git->cat_async($smsg->{blob} // die("no blob (#$art)"), + \&_reindex_1, $smsg); +} + +sub reindex_done { + my ($self) = @_; + my ($eidx, $tl) = eidx_init($self); + $eidx->git->async_wait_all; + # ->done to be called via sto_done_request +} + sub add_eml { my ($self, $eml, $vmd, $xoids) = @_; my $im = $self->{-fake_im} // $self->importer; # may create new epoch