From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id E68351F87F for ; Wed, 29 May 2019 20:56:32 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 2/2] searchidx: store indexlevel=medium as metadata Date: Wed, 29 May 2019 20:56:32 +0000 Message-Id: <20190529205632.18760-3-e@80x24.org> In-Reply-To: <20190529205632.18760-1-e@80x24.org> References: <20190529205632.18760-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: And use it from Admin. It's easy to tell what indexlevel=basic is from unconfigured inboxes, but distinguishing between 'medium' and 'full' would require stat()-ing position.* files which is fragile and Xapian-implementation-dependent. So use the metadata facility of Xapian and store it in the main partition so Admin tools can deal better with unconfigured inboxes copied using generic tools like cp(1) or rsync(1). --- lib/PublicInbox/Admin.pm | 25 +++++++++++++++++++++++++ lib/PublicInbox/SearchIdx.pm | 8 ++++++++ lib/PublicInbox/Xapcmd.pm | 8 ++++++++ script/public-inbox-index | 7 +++---- t/indexlevels-mirror.t | 14 +++++++++++--- 5 files changed, 55 insertions(+), 7 deletions(-) diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm index 07d8b57..4a862c6 100644 --- a/lib/PublicInbox/Admin.pm +++ b/lib/PublicInbox/Admin.pm @@ -41,6 +41,31 @@ sub resolve_repo_dir { } } +# for unconfigured inboxes +sub detect_indexlevel ($) { + my ($ibx) = @_; + + # brand new or never before indexed inboxes default to full + return 'full' unless $ibx->over; + delete $ibx->{over}; # don't leave open FD lying around + + my $l = 'basic'; + my $srch = $ibx->search or return $l; + delete $ibx->{search}; # don't leave open FD lying around + if (my $xdb = $srch->xdb) { + $l = 'full'; + my $m = $xdb->get_metadata('indexlevel'); + if ($m eq 'medium') { + $l = $m; + } elsif ($m ne '') { + warn <<""; +$ibx->{mainrepo} has unexpected indexlevel in Xapian: $m + + } + } + $l; +} + sub resolve_inboxes { my ($argv, $warn_on_unconfigured) = @_; require PublicInbox::Config; diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index b963805..9985628 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -828,6 +828,14 @@ sub commit_txn_lazy { delete $self->{txn} or return; $self->{-inbox}->with_umask(sub { if (my $xdb = $self->{xdb}) { + + # store 'indexlevel=medium' in v2 part=0 and v1 (only part) + # This metadata is read by Admin::detect_indexlevel: + if (!$self->{partition} # undef or 0, not >0 + && $self->{indexlevel} eq 'medium') { + $xdb->set_metadata('indexlevel', 'medium'); + } + $xdb->commit_transaction; } $self->{over}->commit_lazy if $self->{over}; diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm index 7e3d47f..9067231 100644 --- a/lib/PublicInbox/Xapcmd.pm +++ b/lib/PublicInbox/Xapcmd.pm @@ -275,6 +275,14 @@ sub cpdb ($$) { my $lc = $src->get_metadata('last_commit'); $dst->set_metadata('last_commit', $lc) if $lc; + # only the first xapian partition (0) gets 'indexlevel' + if ($old =~ m!(?:xapian\d+|xap\d+/0)\z!) { + my $l = $src->get_metadata('indexlevel'); + if ($l eq 'medium') { + $dst->set_metadata('indexlevel', $l); + } + } + $it = $src->postlist_begin(''); $end = $src->postlist_end(''); if ($pr) { diff --git a/script/public-inbox-index b/script/public-inbox-index index e4a7be1..439da15 100755 --- a/script/public-inbox-index +++ b/script/public-inbox-index @@ -25,10 +25,9 @@ PublicInbox::Admin::require_or_die('-index'); usage() unless @ibxs; my $mods = {}; foreach my $ibx (@ibxs) { - if (defined $opt->{indexlevel} && !defined($ibx->{indexlevel})) { - # XXX: users can shoot themselves in the foot, with this... - $ibx->{indexlevel} = $opt->{indexlevel}; - } + # XXX: users can shoot themselves in the foot, with opt->{indexlevel} + $ibx->{indexlevel} //= $opt->{indexlevel} // + PublicInbox::Admin::detect_indexlevel($ibx); PublicInbox::Admin::scan_ibx_modules($mods, $ibx); } diff --git a/t/indexlevels-mirror.t b/t/indexlevels-mirror.t index 1251136..bf0f801 100644 --- a/t/indexlevels-mirror.t +++ b/t/indexlevels-mirror.t @@ -7,6 +7,7 @@ use PublicInbox::MIME; use PublicInbox::Inbox; use PublicInbox::InboxWritable; use File::Temp qw/tempdir/; +require PublicInbox::Admin; require './t/common.perl'; require_git(2.6); my $this = (split('/', __FILE__))[-1]; @@ -119,6 +120,8 @@ sub import_index_incremental { if ($level ne 'basic') { is(system(@xcpdb, $mirror), 0, "v$v xcpdb OK"); + is(PublicInbox::Admin::detect_indexlevel($ro_mirror), $level, + 'indexlevel detectable by Admin after xcpdb v' .$v.$level); delete $ro_mirror->{$_} for (qw(over search)); ($nr, $msgs) = $ro_mirror->search->query('m:m@2'); is($nr, 1, "v$v found m\@2 via Xapian on $level"); @@ -157,6 +160,9 @@ sub import_index_incremental { @rw_nums = map { $_->{num} } @{$ibx->over->query_ts(0, 0)}; is_deeply(\@rw_nums, \@expect, "v$v master has expected NNTP articles"); is_deeply(\@ro_nums, \@expect, "v$v mirror matches master articles"); + + is(PublicInbox::Admin::detect_indexlevel($ro_mirror), $level, + 'indexlevel detectable by Admin '.$v.$level); } # we can probably cull some other tests and put full/medium tests, here @@ -172,9 +178,11 @@ SKIP: { require PublicInbox::Search; PublicInbox::Search::load_xapian() or skip 'Search::Xapian missing', 2; for my $v (1..2) { - subtest("v$v indexlevel=medium" => sub { - import_index_incremental($v, 'medium'); - }) + foreach my $l (qw(medium full)) { + subtest("v$v indexlevel=$l" => sub { + import_index_incremental($v, $l); + }); + } } } -- EW