about summary refs log tree commit homepage
path: root/lib/PublicInbox/Search.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2020-12-17 03:39:49 +0000
committerEric Wong <e@80x24.org>2020-12-19 09:32:08 +0000
commitba135f3e25bf5d1b3aa3d34e31fefb55ee4c8d29 (patch)
tree591c59df84844c8383c103f3b23b0dc9394e763e /lib/PublicInbox/Search.pm
parent475e6b6a722361223505a7fcb084f5e729c69240 (diff)
downloadpublic-inbox-ba135f3e25bf5d1b3aa3d34e31fefb55ee4c8d29.tar.gz
search: simplify initialization, add ->xdb_shards_flat
This reduces differences between v1 and v2 code, and
introduces ->xdb_shards_flat to provide read-only access
to shards without using Xapian::MultiDatabase.  This
will allow us to combine shards of several inboxes
AND extindexes for lei.
Diffstat (limited to 'lib/PublicInbox/Search.pm')
-rw-r--r--lib/PublicInbox/Search.pm65
1 files changed, 25 insertions, 40 deletions
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index b1d38fb9..bbc5e32f 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -191,41 +191,37 @@ sub xdir ($;$) {
         }
 }
 
-sub xdb_sharded {
+# returns all shards as separate Xapian::Database objects w/o combining
+sub xdb_shards_flat ($) {
         my ($self) = @_;
-        opendir(my $dh, $self->{xpfx}) or return; # not initialized yet
-
-        # We need numeric sorting so shard[0] is first for reading
-        # Xapian metadata, if needed
-        my $last = max(grep(/\A[0-9]+\z/, readdir($dh))) // return;
+        my $xpfx = $self->{xpfx};
         my (@xdb, $slow_phrase);
-        for (0..$last) {
-                my $shard_dir = "$self->{xpfx}/$_";
-                if (-d $shard_dir && -r _) {
+        if ($xpfx =~ m/xapian${\SCHEMA_VERSION}\z/) {
+                @xdb = ($X{Database}->new($xpfx));
+                $self->{qp_flags} |= FLAG_PHRASE() if !-f "$xpfx/iamchert";
+        } else {
+                opendir(my $dh, $xpfx) or return (); # not initialized yet
+                # We need numeric sorting so shard[0] is first for reading
+                # Xapian metadata, if needed
+                my $last = max(grep(/\A[0-9]+\z/, readdir($dh))) // return ();
+                for (0..$last) {
+                        my $shard_dir = "$self->{xpfx}/$_";
                         push @xdb, $X{Database}->new($shard_dir);
                         $slow_phrase ||= -f "$shard_dir/iamchert";
-                } else { # gaps from missing epochs throw off mdocid()
-                        warn "E: $shard_dir missing or unreadable\n";
-                        return;
                 }
+                $self->{qp_flags} |= FLAG_PHRASE() if !$slow_phrase;
         }
-        $self->{qp_flags} |= FLAG_PHRASE() if !$slow_phrase;
-        $self->{nshard} = scalar(@xdb);
-        my $xdb = shift @xdb;
-        $xdb->add_database($_) for @xdb;
-        $xdb;
+        @xdb;
 }
 
 sub _xdb {
         my ($self) = @_;
-        my $dir = xdir($self, 1);
         $self->{qp_flags} //= $QP_FLAGS;
-        if ($self->{ibx_ver} >= 2) {
-                xdb_sharded($self);
-        } else {
-                $self->{qp_flags} |= FLAG_PHRASE() if !-f "$dir/iamchert";
-                $X{Database}->new($dir);
-        }
+        my @xdb = xdb_shards_flat($self) or return;
+        $self->{nshard} = scalar(@xdb);
+        my $xdb = shift @xdb;
+        $xdb->add_database($_) for @xdb;
+        $xdb;
 }
 
 # v2 Xapian docids don't conflict, so they're identical to
@@ -239,7 +235,7 @@ sub mdocid {
 
 sub mset_to_artnums {
         my ($self, $mset) = @_;
-        my $nshard = $self->{nshard} // 1;
+        my $nshard = $self->{nshard};
         [ map { mdocid($nshard, $_) } $mset->items ];
 }
 
@@ -251,25 +247,14 @@ sub xdb ($) {
         };
 }
 
-sub xpfx_init ($) {
-        my ($self) = @_;
-        if ($self->{ibx_ver} == 1) {
-                $self->{xpfx} .= '/public-inbox/xapian' . SCHEMA_VERSION;
-        } else {
-                $self->{xpfx} .= '/xap'.SCHEMA_VERSION;
-        }
-}
-
 sub new {
         my ($class, $ibx) = @_;
         ref $ibx or die "BUG: expected PublicInbox::Inbox object: $ibx";
-        my $self = bless {
-                xpfx => $ibx->{inboxdir}, # for xpfx_init
+        my $xap = $ibx->version > 1 ? 'xap' : 'public-inbox/xapian';
+        bless {
+                xpfx => "$ibx->{inboxdir}/$xap" . SCHEMA_VERSION,
                 altid => $ibx->{altid},
-                ibx_ver => $ibx->version,
         }, $class;
-        xpfx_init($self);
-        $self;
 }
 
 sub reopen {
@@ -362,7 +347,7 @@ sub _enquire_once { # retry_reopen callback
 
 sub mset_to_smsg {
         my ($self, $ibx, $mset) = @_;
-        my $nshard = $self->{nshard} // 1;
+        my $nshard = $self->{nshard};
         my $i = 0;
         my %order = map { mdocid($nshard, $_) => ++$i } $mset->items;
         my @msgs = sort {