From ba135f3e25bf5d1b3aa3d34e31fefb55ee4c8d29 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 17 Dec 2020 03:39:49 +0000 Subject: search: simplify initialization, add ->xdb_shards_flat This reduces differences between v1 and v2 code, and introduces ->xdb_shards_flat to provide read-only access to shards without using Xapian::MultiDatabase. This will allow us to combine shards of several inboxes AND extindexes for lei. --- lib/PublicInbox/Search.pm | 65 ++++++++++++++++++----------------------------- 1 file changed, 25 insertions(+), 40 deletions(-) (limited to 'lib/PublicInbox/Search.pm') diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index b1d38fb9..bbc5e32f 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -191,41 +191,37 @@ sub xdir ($;$) { } } -sub xdb_sharded { +# returns all shards as separate Xapian::Database objects w/o combining +sub xdb_shards_flat ($) { my ($self) = @_; - opendir(my $dh, $self->{xpfx}) or return; # not initialized yet - - # We need numeric sorting so shard[0] is first for reading - # Xapian metadata, if needed - my $last = max(grep(/\A[0-9]+\z/, readdir($dh))) // return; + my $xpfx = $self->{xpfx}; my (@xdb, $slow_phrase); - for (0..$last) { - my $shard_dir = "$self->{xpfx}/$_"; - if (-d $shard_dir && -r _) { + if ($xpfx =~ m/xapian${\SCHEMA_VERSION}\z/) { + @xdb = ($X{Database}->new($xpfx)); + $self->{qp_flags} |= FLAG_PHRASE() if !-f "$xpfx/iamchert"; + } else { + opendir(my $dh, $xpfx) or return (); # not initialized yet + # We need numeric sorting so shard[0] is first for reading + # Xapian metadata, if needed + my $last = max(grep(/\A[0-9]+\z/, readdir($dh))) // return (); + for (0..$last) { + my $shard_dir = "$self->{xpfx}/$_"; push @xdb, $X{Database}->new($shard_dir); $slow_phrase ||= -f "$shard_dir/iamchert"; - } else { # gaps from missing epochs throw off mdocid() - warn "E: $shard_dir missing or unreadable\n"; - return; } + $self->{qp_flags} |= FLAG_PHRASE() if !$slow_phrase; } - $self->{qp_flags} |= FLAG_PHRASE() if !$slow_phrase; - $self->{nshard} = scalar(@xdb); - my $xdb = shift @xdb; - $xdb->add_database($_) for @xdb; - $xdb; + @xdb; } sub _xdb { my ($self) = @_; - my $dir = xdir($self, 1); $self->{qp_flags} //= $QP_FLAGS; - if ($self->{ibx_ver} >= 2) { - xdb_sharded($self); - } else { - $self->{qp_flags} |= FLAG_PHRASE() if !-f "$dir/iamchert"; - $X{Database}->new($dir); - } + my @xdb = xdb_shards_flat($self) or return; + $self->{nshard} = scalar(@xdb); + my $xdb = shift @xdb; + $xdb->add_database($_) for @xdb; + $xdb; } # v2 Xapian docids don't conflict, so they're identical to @@ -239,7 +235,7 @@ sub mdocid { sub mset_to_artnums { my ($self, $mset) = @_; - my $nshard = $self->{nshard} // 1; + my $nshard = $self->{nshard}; [ map { mdocid($nshard, $_) } $mset->items ]; } @@ -251,25 +247,14 @@ sub xdb ($) { }; } -sub xpfx_init ($) { - my ($self) = @_; - if ($self->{ibx_ver} == 1) { - $self->{xpfx} .= '/public-inbox/xapian' . SCHEMA_VERSION; - } else { - $self->{xpfx} .= '/xap'.SCHEMA_VERSION; - } -} - sub new { my ($class, $ibx) = @_; ref $ibx or die "BUG: expected PublicInbox::Inbox object: $ibx"; - my $self = bless { - xpfx => $ibx->{inboxdir}, # for xpfx_init + my $xap = $ibx->version > 1 ? 'xap' : 'public-inbox/xapian'; + bless { + xpfx => "$ibx->{inboxdir}/$xap" . SCHEMA_VERSION, altid => $ibx->{altid}, - ibx_ver => $ibx->version, }, $class; - xpfx_init($self); - $self; } sub reopen { @@ -362,7 +347,7 @@ sub _enquire_once { # retry_reopen callback sub mset_to_smsg { my ($self, $ibx, $mset) = @_; - my $nshard = $self->{nshard} // 1; + my $nshard = $self->{nshard}; my $i = 0; my %order = map { mdocid($nshard, $_) => ++$i } $mset->items; my @msgs = sort { -- cgit v1.2.3-24-ge0c7