about summary refs log tree commit homepage
path: root/lib/PublicInbox
diff options
context:
space:
mode:
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r--lib/PublicInbox/Inbox.pm19
-rw-r--r--lib/PublicInbox/Search.pm84
-rw-r--r--lib/PublicInbox/SearchIdx.pm50
-rw-r--r--lib/PublicInbox/WWW.pm7
4 files changed, 99 insertions, 61 deletions
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index dc186b73..813ed997 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -25,7 +25,7 @@ sub cleanup_task () {
         my $next = {};
         for my $ibx (values %$CLEANUP) {
                 my $again;
-                foreach my $f (qw(mm search)) {
+                foreach my $f (qw(mm search over)) {
                         delete $ibx->{$f} if SvREFCNT($ibx->{$f}) == 1;
                 }
                 my $expire = time - 60;
@@ -37,7 +37,7 @@ sub cleanup_task () {
                                 $again = 1 if $git->cleanup($expire);
                         }
                 }
-                $again ||= !!($ibx->{mm} || $ibx->{search});
+                $again ||= !!($ibx->{over} || $ibx->{mm} || $ibx->{search});
                 $next->{"$ibx"} = $ibx if $again;
         }
         $CLEANUP = $next;
@@ -175,14 +175,17 @@ sub search ($;$) {
                 require PublicInbox::Search;
                 PublicInbox::Search->new($self, $self->{altid});
         };
-        # TODO: lazily load Xapian
-        # return $srch if $over_only || eval { $srch->xdb };
-        # undef;
+        ($over_only || eval { $srch->xdb }) ? $srch : undef;
 }
 
 sub over ($) {
-        my $srch = search($_[0], 1) or return;
-        $srch->{over_ro};
+        my ($self) = @_;
+        my $srch = search($self, 1) or return;
+        $self->{over} ||= eval {
+                my $over = $srch->{over_ro};
+                $over->dbh_new; # may fail
+                $over;
+        }
 }
 
 sub try_cat {
@@ -290,7 +293,7 @@ sub nntp_url {
 sub nntp_usable {
         my ($self) = @_;
         my $ret = mm($self) && over($self);
-        $self->{mm} = $self->{search} = undef;
+        $self->{mm} = $self->{over} = $self->{search} = undef;
         $ret;
 }
 
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index e79ec0f8..b1e62f4c 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -12,11 +12,21 @@ use constant TS => 0;  # Received: header in Unix time
 use constant YYYYMMDD => 1; # Date: header for searching in the WWW UI
 use constant DT => 2; # Date: YYYYMMDDHHMMSS
 
-use Search::Xapian qw/:standard/;
 use PublicInbox::SearchMsg;
 use PublicInbox::MIME;
 use PublicInbox::MID qw/id_compress/;
 use PublicInbox::Over;
+my $QP_FLAGS;
+sub load_xapian () {
+        $QP_FLAGS ||= eval {
+                require Search::Xapian;
+                Search::Xapian->import(qw(:standard));
+
+                # n.b. FLAG_PURE_NOT is expensive not suitable for a public
+                # website as it could become a denial-of-service vector
+                FLAG_PHRASE()|FLAG_BOOLEAN()|FLAG_LOVEHATE()|FLAG_WILDCARD();
+        };
+};
 
 # This is English-only, everything else is non-standard and may be confused as
 # a prefix common in patch emails
@@ -41,10 +51,6 @@ use constant {
         #      (commit 83425ef12e4b65cdcecd11ddcb38175d4a91d5a0)
         # 14 - fix ghost root vivification
         SCHEMA_VERSION => 15,
-
-        # n.b. FLAG_PURE_NOT is expensive not suitable for a public website
-        # as it could become a denial-of-service vector
-        QP_FLAGS => FLAG_PHRASE|FLAG_BOOLEAN|FLAG_LOVEHATE|FLAG_WILDCARD,
 };
 
 my %bool_pfx_external = (
@@ -113,18 +119,43 @@ EOF
 );
 chomp @HELP;
 
-sub xdir {
-        my ($self) = @_;
+sub xdir ($;$) {
+        my ($self, $rdonly) = @_;
         if ($self->{version} == 1) {
                 "$self->{mainrepo}/public-inbox/xapian" . SCHEMA_VERSION;
         } else {
                 my $dir = "$self->{mainrepo}/xap" . SCHEMA_VERSION;
+                return $dir if $rdonly;
+
                 my $part = $self->{partition};
                 defined $part or die "partition not given";
                 $dir .= "/$part";
         }
 }
 
+sub xdb ($) {
+        my ($self) = @_;
+        $self->{xdb} ||= do {
+                load_xapian();
+                my $dir = xdir($self, 1);
+                if ($self->{version} >= 2) {
+                        my $xdb;
+                        foreach my $part (<$dir/*>) {
+                                -d $part && $part =~ m!/\d+\z! or next;
+                                my $sub = Search::Xapian::Database->new($part);
+                                if ($xdb) {
+                                        $xdb->add_database($sub);
+                                } else {
+                                        $xdb = $sub;
+                                }
+                        }
+                        $xdb;
+                } else {
+                        Search::Xapian::Database->new($dir);
+                }
+        };
+}
+
 sub new {
         my ($class, $mainrepo, $altid) = @_;
         my $version = 1;
@@ -138,33 +169,16 @@ sub new {
                 altid => $altid,
                 version => $version,
         }, $class;
-        my $dir;
-        if ($version >= 2) {
-                $dir = "$self->{mainrepo}/xap" . SCHEMA_VERSION;
-                my $xdb;
-                my $parts = 0;
-                foreach my $part (<$dir/*>) {
-                        -d $part && $part =~ m!/\d+\z! or next;
-                        $parts++;
-                        my $sub = Search::Xapian::Database->new($part);
-                        if ($xdb) {
-                                $xdb->add_database($sub);
-                        } else {
-                                $xdb = $sub;
-                        }
-                }
-                $self->{xdb} = $xdb;
-        } else {
-                $dir = $self->xdir;
-                $self->{xdb} = Search::Xapian::Database->new($dir);
-        }
+        my $dir = xdir($self, 1);
         $self->{over_ro} = PublicInbox::Over->new("$dir/over.sqlite3");
         $self;
 }
 
 sub reopen {
         my ($self) = @_;
-        $self->{xdb}->reopen;
+        if (my $xdb = $self->{xdb}) {
+                $xdb->reopen;
+        }
         $self; # make chaining easier
 }
 
@@ -175,7 +189,8 @@ sub query {
         if ($query_string eq '' && !$opts->{mset}) {
                 $self->{over_ro}->recent($opts);
         } else {
-                my $query = $self->qp->parse_query($query_string, QP_FLAGS);
+                my $qp = qp($self);
+                my $query = $qp->parse_query($query_string, $QP_FLAGS);
                 $opts->{relevance} = 1 unless exists $opts->{relevance};
                 _do_enquire($self, $query, $opts);
         }
@@ -213,7 +228,8 @@ sub _do_enquire {
 
 sub _enquire_once {
         my ($self, $query, $opts) = @_;
-        my $enquire = Search::Xapian::Enquire->new($self->{xdb});
+        my $xdb = xdb($self);
+        my $enquire = Search::Xapian::Enquire->new($xdb);
         $enquire->set_query($query);
         $opts ||= {};
         my $desc = !$opts->{asc};
@@ -246,13 +262,13 @@ sub qp {
 
         my $qp = $self->{query_parser};
         return $qp if $qp;
-
+        my $xdb = xdb($self);
         # new parser
         $qp = Search::Xapian::QueryParser->new;
-        $qp->set_default_op(OP_AND);
-        $qp->set_database($self->{xdb});
+        $qp->set_default_op(OP_AND());
+        $qp->set_database($xdb);
         $qp->set_stemmer($self->stemmer);
-        $qp->set_stemming_strategy(STEM_SOME);
+        $qp->set_stemming_strategy(STEM_SOME());
         $qp->set_max_wildcard_expansion(100);
         $qp->add_valuerangeprocessor(
                 Search::Xapian::NumberValueRangeProcessor->new(YYYYMMDD, 'd:'));
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 1b86f727..135b5eb9 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -54,11 +54,10 @@ sub new {
                                 die("Invalid indexlevel $ibx->{indexlevel}\n");
                         }
                 }
-        } else { # v1
+        } else { # FIXME: old tests: old tests
                 $ibx = { mainrepo => $git_dir, version => 1 };
         }
         $ibx = PublicInbox::InboxWritable->new($ibx);
-        require Search::Xapian::WritableDatabase;
         my $self = bless {
                 mainrepo => $mainrepo,
                 -inbox => $ibx,
@@ -84,25 +83,36 @@ sub new {
         $self;
 }
 
+sub need_xapian ($) { $_[0]->{indexlevel} =~ $xapianlevels }
+
 sub _xdb_release {
         my ($self) = @_;
-        my $xdb = delete $self->{xdb} or croak 'not acquired';
-        $xdb->close;
+        if (need_xapian($self)) {
+                my $xdb = delete $self->{xdb} or croak 'not acquired';
+                $xdb->close;
+        }
         $self->lock_release if $self->{creat};
         undef;
 }
 
 sub _xdb_acquire {
         my ($self) = @_;
-        croak 'already acquired' if $self->{xdb};
+        my $flag;
         my $dir = $self->xdir;
-        my $flag = Search::Xapian::DB_OPEN;
+        if (need_xapian($self)) {
+                croak 'already acquired' if $self->{xdb};
+                PublicInbox::Search::load_xapian();
+                require Search::Xapian::WritableDatabase;
+                $flag = $self->{creat} ?
+                        Search::Xapian::DB_CREATE_OR_OPEN() :
+                        Search::Xapian::DB_OPEN();
+        }
         if ($self->{creat}) {
                 require File::Path;
                 $self->lock_acquire;
                 File::Path::mkpath($dir);
-                $flag = Search::Xapian::DB_CREATE_OR_OPEN;
         }
+        return unless defined $flag;
         $self->{xdb} = Search::Xapian::WritableDatabase->new($dir, $flag);
 }
 
@@ -342,7 +352,7 @@ sub add_message {
                 $num = index_mm($self, $mime);
         }
         eval {
-                if ($self->{indexlevel} =~ $xapianlevels) {
+                if (need_xapian($self)) {
                         $self->add_xapian($mime, $num, $oid, $mids, $mid0)
                 }
                 if (my $over = $self->{over}) {
@@ -383,7 +393,6 @@ sub batch_do {
 # v1 only, where $mid is unique
 sub remove_message {
         my ($self, $mid) = @_;
-        my $db = $self->{xdb};
         $mid = mid_clean($mid);
 
         if (my $over = $self->{over}) {
@@ -394,7 +403,8 @@ sub remove_message {
                         warn "<$mid> missing for removal from overview\n";
                 }
         }
-        return if $self->{indexlevel} !~ $xapianlevels;
+        return unless need_xapian($self);
+        my $db = $self->{xdb};
         my $nr = 0;
         eval {
                 batch_do($self, 'Q' . $mid, sub {
@@ -413,10 +423,12 @@ sub remove_message {
 # MID is a hint in V2
 sub remove_by_oid {
         my ($self, $oid, $mid) = @_;
-        my $db = $self->{xdb};
 
         $self->{over}->remove_oid($oid, $mid) if $self->{over};
 
+        return unless need_xapian($self);
+        my $db = $self->{xdb};
+
         # XXX careful, we cannot use batch_do here since we conditionally
         # delete documents based on other factors, so we cannot call
         # find_doc_ids twice.
@@ -664,7 +676,7 @@ sub _last_x_commit {
         my ($self, $mm) = @_;
         my $lm = $mm->last_commit || '';
         my $lx = '';
-        if ($self->{indexlevel} =~ $xapianlevels) {
+        if (need_xapian($self)) {
                 $lx = $self->{xdb}->get_metadata('last_commit') || '';
         } else {
                 $lx = $lm;
@@ -695,7 +707,7 @@ sub _index_sync {
                 $self->{over}->disconnect;
                 $git->cleanup;
                 delete $self->{txn};
-                $xdb->cancel_transaction;
+                $xdb->cancel_transaction if $xdb;
                 $xdb = _xdb_release($self);
 
                 # ensure we leak no FDs to "git log" with Xapian <= 1.2
@@ -717,7 +729,7 @@ sub _index_sync {
                         }
                         $dbh->commit;
                 }
-                if ($newest && $self->{indexlevel} =~ $xapianlevels) {
+                if ($newest && need_xapian($self)) {
                         my $cur = $xdb->get_metadata('last_commit');
                         if (need_update($self, $cur, $newest)) {
                                 $xdb->set_metadata('last_commit', $newest);
@@ -785,7 +797,7 @@ sub begin_txn_lazy {
         $self->{-inbox}->with_umask(sub {
                 my $xdb = $self->{xdb} || $self->_xdb_acquire;
                 $self->{over}->begin_lazy if $self->{over};
-                $xdb->begin_transaction;
+                $xdb->begin_transaction if $xdb;
                 $self->{txn} = 1;
                 $xdb;
         });
@@ -795,14 +807,18 @@ sub commit_txn_lazy {
         my ($self) = @_;
         delete $self->{txn} or return;
         $self->{-inbox}->with_umask(sub {
-                $self->{xdb}->commit_transaction;
+                if (my $xdb = $self->{xdb}) {
+                        $xdb->commit_transaction;
+                }
                 $self->{over}->commit_lazy if $self->{over};
         });
 }
 
 sub worker_done {
         my ($self) = @_;
-        die "$$ $0 xdb not released\n" if $self->{xdb};
+        if (need_xapian($self)) {
+                die "$$ $0 xdb not released\n" if $self->{xdb};
+        }
         die "$$ $0 still in transaction\n" if $self->{txn};
 }
 
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 0f963dcb..8e1b1afe 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -149,8 +149,11 @@ sub preload {
         require PublicInbox::MIME;
         require Digest::SHA;
         require POSIX;
-
-        foreach (qw(PublicInbox::Search PublicInbox::SearchView
+        eval {
+                require PublicInbox::Search;
+                PublicInbox::Search::load_xapian();
+        };
+        foreach (qw(PublicInbox::SearchView
                         PublicInbox::Mbox IO::Compress::Gzip
                         PublicInbox::NewsWWW)) {
                 eval "require $_;";