about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2019-12-22 22:17:39 +0000
committerEric Wong <e@80x24.org>2019-12-24 03:48:12 +0000
commitcd8dd7b08fddc7c2b5f218c3fcaa5dca5f9ad945 (patch)
tree0989e4981be20e2408dda31314da019bd1841647 /lib
parent9484944697a1bf76d00d84d081e6a7d633074336 (diff)
downloadpublic-inbox-cd8dd7b08fddc7c2b5f218c3fcaa5dca5f9ad945.tar.gz
Xapian upstream is slowly phasing out the XS-based Search::Xapian
in favor of the SWIG-generated "Xapian" package.  While Debian and
both FreeBSD have Search::Xapian, OpenBSD only includes the "Xapian"
binding.

More information about the status of the "Xapian" Perl module here:
https://trac.xapian.org/ticket/523
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/Admin.pm4
-rw-r--r--lib/PublicInbox/Search.pm57
-rw-r--r--lib/PublicInbox/SearchIdx.pm32
-rw-r--r--lib/PublicInbox/SearchMsg.pm3
-rw-r--r--lib/PublicInbox/TestCommon.pm10
-rw-r--r--lib/PublicInbox/V2Writable.pm5
-rw-r--r--lib/PublicInbox/Xapcmd.pm25
7 files changed, 95 insertions, 41 deletions
diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm
index 3d0d80b9..32a9f65e 100644
--- a/lib/PublicInbox/Admin.pm
+++ b/lib/PublicInbox/Admin.pm
@@ -159,6 +159,10 @@ sub check_require {
         while (my $mod = shift @mods) {
                 if (my $groups = $mod_groups{$mod}) {
                         push @mods, @$groups;
+                } elsif ($mod eq 'Search::Xapian') {
+                        require PublicInbox::Search;
+                        PublicInbox::Search::load_xapian() or
+                                $err->{'Search::Xapian || Xapian'} = $@;
                 } else {
                         eval "require $mod";
                         $err->{$mod} = $@ if $@;
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 88c8dc70..65c6ee83 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -17,20 +17,42 @@ use PublicInbox::MIME;
 use PublicInbox::MID qw/id_compress/;
 use PublicInbox::Over;
 my $QP_FLAGS;
+our %X = map { $_ => 0 } qw(BoolWeight Database Enquire
+                        NumberValueRangeProcessor QueryParser Stem);
+our $Xap; # 'Search::Xapian' or 'Xapian'
+my $ENQ_ASCENDING;
+
 sub load_xapian () {
-        $QP_FLAGS ||= eval {
-                require Search::Xapian;
-                Search::Xapian->import(qw(:standard));
+        return 1 if defined $Xap;
+        for my $x (qw(Search::Xapian Xapian)) {
+                eval "require $x";
+                next if $@;
+
+                $x->import(qw(:standard));
+                $Xap = $x;
+                $X{$_} = $Xap.'::'.$_ for (keys %X);
 
+                # ENQ_ASCENDING doesn't seem exported by SWIG Xapian.pm,
+                # so lets hope this part of the ABI is stable because it's
+                # just an integer:
+                $ENQ_ASCENDING = $x eq 'Xapian' ?
+                                1 : Search::Xapian::ENQ_ASCENDING();
+
+                # for SearchMsg:
+                *PublicInbox::SearchMsg::sortable_unserialise =
+                                                $Xap.'::sortable_unserialise';
                 # n.b. FLAG_PURE_NOT is expensive not suitable for a public
                 # website as it could become a denial-of-service vector
                 # FLAG_PHRASE also seems to cause performance problems chert
                 # (and probably earlier Xapian DBs).  glass seems fine...
                 # TODO: make this an option, maybe?
                 # or make indexlevel=medium as default
-                FLAG_PHRASE()|FLAG_BOOLEAN()|FLAG_LOVEHATE()|FLAG_WILDCARD();
-        };
-};
+                $QP_FLAGS = FLAG_PHRASE() | FLAG_BOOLEAN() | FLAG_LOVEHATE() |
+                                FLAG_WILDCARD();
+                last;
+        }
+        undef;
+}
 
 # This is English-only, everything else is non-standard and may be confused as
 # a prefix common in patch emails
@@ -148,7 +170,7 @@ sub _xdb ($) {
         if ($self->{version} >= 2) {
                 foreach my $shard (<$dir/*>) {
                         -d $shard && $shard =~ m!/[0-9]+\z! or next;
-                        my $sub = Search::Xapian::Database->new($shard);
+                        my $sub = $X{Database}->new($shard);
                         if ($xdb) {
                                 $xdb->add_database($sub);
                         } else {
@@ -158,7 +180,7 @@ sub _xdb ($) {
                 }
         } else {
                 $slow_phrase = -f "$dir/iamchert";
-                $xdb = Search::Xapian::Database->new($dir);
+                $xdb = $X{Database}->new($dir);
         }
         $$qpf |= FLAG_PHRASE() unless $slow_phrase;
         $xdb;
@@ -222,7 +244,7 @@ sub retry_reopen {
                 }
                 # Exception: The revision being read has been discarded -
                 # you should call Xapian::Database::reopen()
-                if (ref($@) eq 'Search::Xapian::DatabaseModifiedError') {
+                if (ref($@) =~ /\bDatabaseModifiedError\b/) {
                         warn "reopen try #$i on $@\n";
                         reopen($self);
                 } else {
@@ -243,13 +265,13 @@ sub _do_enquire {
 sub _enquire_once {
         my ($self, $query, $opts) = @_;
         my $xdb = xdb($self);
-        my $enquire = Search::Xapian::Enquire->new($xdb);
+        my $enquire = $X{Enquire}->new($xdb);
         $enquire->set_query($query);
         $opts ||= {};
         my $desc = !$opts->{asc};
         if (($opts->{mset} || 0) == 2) {
-                $enquire->set_docid_order(Search::Xapian::ENQ_ASCENDING());
-                $enquire->set_weighting_scheme(Search::Xapian::BoolWeight->new);
+                $enquire->set_docid_order($ENQ_ASCENDING);
+                $enquire->set_weighting_scheme($X{BoolWeight}->new);
         } elsif ($opts->{relevance}) {
                 $enquire->set_sort_by_relevance_then_value(TS, $desc);
         } else {
@@ -268,7 +290,7 @@ sub _enquire_once {
 }
 
 # read-write
-sub stemmer { Search::Xapian::Stem->new($LANG) }
+sub stemmer { $X{Stem}->new($LANG) }
 
 # read-only
 sub qp {
@@ -278,16 +300,15 @@ sub qp {
         return $qp if $qp;
         my $xdb = xdb($self);
         # new parser
-        $qp = Search::Xapian::QueryParser->new;
+        $qp = $X{QueryParser}->new;
         $qp->set_default_op(OP_AND());
         $qp->set_database($xdb);
         $qp->set_stemmer($self->stemmer);
         $qp->set_stemming_strategy(STEM_SOME());
         $qp->set_max_wildcard_expansion(100);
-        $qp->add_valuerangeprocessor(
-                Search::Xapian::NumberValueRangeProcessor->new(YYYYMMDD, 'd:'));
-        $qp->add_valuerangeprocessor(
-                Search::Xapian::NumberValueRangeProcessor->new(DT, 'dt:'));
+        my $nvrp = $X{NumberValueRangeProcessor};
+        $qp->add_valuerangeprocessor($nvrp->new(YYYYMMDD, 'd:'));
+        $qp->add_valuerangeprocessor($nvrp->new(DT, 'dt:'));
 
         while (my ($name, $prefix) = each %bool_pfx_external) {
                 $qp->add_boolean_prefix($name, $_) foreach split(/ /, $prefix);
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index b56fd0ee..21ab8119 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -19,7 +19,8 @@ use POSIX qw(strftime);
 use PublicInbox::OverIdx;
 use PublicInbox::Spawn qw(spawn);
 use PublicInbox::Git qw(git_unquote);
-
+my $X = \%PublicInbox::Search::X;
+my ($DB_CREATE_OR_OPEN, $DB_OPEN);
 use constant {
         BATCH_BYTES => defined($ENV{XAPIAN_FLUSH_THRESHOLD}) ?
                         0x7fffffff : 1_000_000,
@@ -85,17 +86,28 @@ sub _xdb_release {
         undef;
 }
 
+sub load_xapian_writable () {
+        return 1 if $X->{WritableDatabase};
+        PublicInbox::Search::load_xapian() or return;
+        my $xap = $PublicInbox::Search::Xap;
+        for (qw(Document TermGenerator WritableDatabase)) {
+                $X->{$_} = $xap.'::'.$_;
+        }
+        eval 'require '.$X->{WritableDatabase} or die;
+        *sortable_serialise = $xap.'::sortable_serialise';
+        $DB_CREATE_OR_OPEN = eval($xap.'::DB_CREATE_OR_OPEN()');
+        $DB_OPEN = eval($xap.'::DB_OPEN()');
+        1;
+}
+
 sub _xdb_acquire {
         my ($self) = @_;
         my $flag;
         my $dir = $self->xdir;
         if (need_xapian($self)) {
                 croak 'already acquired' if $self->{xdb};
-                PublicInbox::Search::load_xapian();
-                require Search::Xapian::WritableDatabase;
-                $flag = $self->{creat} ?
-                        Search::Xapian::DB_CREATE_OR_OPEN() :
-                        Search::Xapian::DB_OPEN();
+                load_xapian_writable();
+                $flag = $self->{creat} ? $DB_CREATE_OR_OPEN : $DB_OPEN;
         }
         if ($self->{creat}) {
                 require File::Path;
@@ -108,7 +120,7 @@ sub _xdb_acquire {
                 }
         }
         return unless defined $flag;
-        my $xdb = eval { Search::Xapian::WritableDatabase->new($dir, $flag) };
+        my $xdb = eval { ($X->{WritableDatabase})->new($dir, $flag) };
         if ($@) {
                 die "Failed opening $dir: ", $@;
         }
@@ -117,7 +129,7 @@ sub _xdb_acquire {
 
 sub add_val ($$$) {
         my ($doc, $col, $num) = @_;
-        $num = Search::Xapian::sortable_serialise($num);
+        $num = sortable_serialise($num);
         $doc->add_value($col, $num);
 }
 
@@ -274,7 +286,7 @@ sub index_body ($$$) {
 sub add_xapian ($$$$$) {
         my ($self, $mime, $num, $oid, $mids, $mid0) = @_;
         my $smsg = PublicInbox::SearchMsg->new($mime);
-        my $doc = Search::Xapian::Document->new;
+        my $doc = $X->{Document}->new;
         my $subj = $smsg->subject;
         add_val($doc, PublicInbox::Search::TS(), $smsg->ts);
         my @ds = gmtime($smsg->ds);
@@ -458,7 +470,7 @@ sub term_generator { # write-only
         my $tg = $self->{term_generator};
         return $tg if $tg;
 
-        $tg = Search::Xapian::TermGenerator->new;
+        $tg = $X->{TermGenerator}->new;
         $tg->set_stemmer($self->stemmer);
 
         $self->{term_generator} = $tg;
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index 7561e7f2..53882f73 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -27,7 +27,8 @@ sub wrap {
 
 sub get_val ($$) {
         my ($doc, $col) = @_;
-        Search::Xapian::sortable_unserialise($doc->get_value($col));
+        # sortable_unserialise is defined by PublicInbox::Search::load_xapian()
+        sortable_unserialise($doc->get_value($col));
 }
 
 sub to_doc_data {
diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm
index 9680b94c..b0b1f4d9 100644
--- a/lib/PublicInbox/TestCommon.pm
+++ b/lib/PublicInbox/TestCommon.pm
@@ -64,7 +64,15 @@ sub require_mods {
         my $maybe = pop @mods if $mods[-1] =~ /\A[0-9]+\z/;
         my @need;
         for my $mod (@mods) {
-                eval "require $mod";
+                if ($mod eq 'Search::Xapian') {
+                        require PublicInbox::Search;
+                        PublicInbox::Search::load_xapian() and next;
+                } elsif ($mod eq 'Search::Xapian::WritableDatabase') {
+                        require PublicInbox::SearchIdx;
+                        PublicInbox::SearchIdx::load_xapian_writable() and next;
+                } else {
+                        eval "require $mod";
+                }
                 push @need, $mod if $@;
         }
         return unless @need;
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index ab80941a..77b3bde4 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -62,10 +62,13 @@ sub count_shards ($) {
         # Also, shard count may change while -watch is running
         # due to "xcpdb --reshard"
         if (-d $xpfx) {
+                require PublicInbox::Search;
+                PublicInbox::Search::load_xapian();
+                my $XapianDatabase = $PublicInbox::Search::X{Database};
                 foreach my $shard (<$xpfx/*>) {
                         -d $shard && $shard =~ m!/[0-9]+\z! or next;
                         eval {
-                                Search::Xapian::Database->new($shard)->close;
+                                $XapianDatabase->new($shard)->close;
                                 $n++;
                         };
                 }
diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm
index 78b3a9ac..9f897dad 100644
--- a/lib/PublicInbox/Xapcmd.pm
+++ b/lib/PublicInbox/Xapcmd.pm
@@ -5,7 +5,7 @@ use strict;
 use warnings;
 use PublicInbox::Spawn qw(which spawn);
 use PublicInbox::Over;
-use PublicInbox::Search;
+use PublicInbox::SearchIdx;
 use File::Temp ();
 use File::Path qw(remove_tree);
 use File::Basename qw(dirname);
@@ -99,7 +99,7 @@ sub prepare_reindex ($$$) {
         my ($ibx, $im, $reindex) = @_;
         if ($ibx->{version} == 1) {
                 my $dir = $ibx->search->xdir(1);
-                my $xdb = Search::Xapian::Database->new($dir);
+                my $xdb = $PublicInbox::Search::X{Database}->new($dir);
                 if (my $lc = $xdb->get_metadata('last_commit')) {
                         $reindex->{from} = $lc;
                 }
@@ -164,7 +164,8 @@ sub run {
         if (!$opt->{-coarse_lock}) {
                 $reindex = $opt->{reindex} = {};
                 $from = $reindex->{from} = [];
-                require Search::Xapian::WritableDatabase;
+                require PublicInbox::SearchIdx;
+                PublicInbox::SearchIdx::load_xapian_writable();
         }
 
         $ibx->umask_prepare;
@@ -251,7 +252,7 @@ sub run {
 
 sub cpdb_retryable ($$) {
         my ($src, $pfx) = @_;
-        if (ref($@) eq 'Search::Xapian::DatabaseModifiedError') {
+        if (ref($@) =~ /\bDatabaseModifiedError\b/) {
                 warn "$pfx Xapian DB modified, reopening and retrying\n";
                 $src->reopen;
                 return 1;
@@ -361,6 +362,8 @@ sub cpdb ($$) {
         my $new = $newdir->dirname;
         my ($src, $cur_shard);
         my $reshard;
+        PublicInbox::SearchIdx::load_xapian_writable() or die;
+        my $XapianDatabase = $PublicInbox::Search::X{Database};
         if (ref($old) eq 'ARRAY') {
                 ($cur_shard) = ($new =~ m!xap[0-9]+/([0-9]+)\b!);
                 defined $cur_shard or
@@ -371,14 +374,14 @@ sub cpdb ($$) {
                 # resharding, M:N copy means have full read access
                 foreach (@$old) {
                         if ($src) {
-                                my $sub = Search::Xapian::Database->new($_);
+                                my $sub = $XapianDatabase->new($_);
                                 $src->add_database($sub);
                         } else {
-                                $src = Search::Xapian::Database->new($_);
+                                $src = $XapianDatabase->new($_);
                         }
                 }
         } else {
-                $src = Search::Xapian::Database->new($old);
+                $src = $XapianDatabase->new($old);
         }
 
         my ($tmp, $ft);
@@ -395,8 +398,10 @@ sub cpdb ($$) {
 
         # like copydatabase(1), be sure we don't overwrite anything in case
         # of other bugs:
-        my $creat = Search::Xapian::DB_CREATE();
-        my $dst = Search::Xapian::WritableDatabase->new($tmp, $creat);
+        my $creat = eval($PublicInbox::Search::Xap.'::DB_CREATE()');
+        die if $@;
+        my $XapianWritableDatabase = $PublicInbox::Search::X{WritableDatabase};
+        my $dst = $XapianWritableDatabase->new($tmp, $creat);
         my $pr = $opt->{-progress};
         my $pfx = $opt->{-progress_pfx} = progress_pfx($new);
         my $pr_data = { pr => $pr, pfx => $pfx, nr => 0 } if $pr;
@@ -439,7 +444,7 @@ sub cpdb ($$) {
                 # individually.
                 $src = undef;
                 foreach (@$old) {
-                        my $old = Search::Xapian::Database->new($_);
+                        my $old = $XapianDatabase->new($_);
                         cpdb_loop($old, $dst, $pr_data, $cur_shard, $reshard);
                 }
         } else {