about summary refs log tree commit homepage
path: root/lib/PublicInbox/Search.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2023-08-24 01:22:33 +0000
committerEric Wong <e@80x24.org>2023-08-24 07:47:51 +0000
commitb18ecb7707e83cb8cb38c3736aecd984999ca0a7 (patch)
tree0f159212810c98aa07d26b6f7f28f4b8dbc9b302 /lib/PublicInbox/Search.pm
parentcf96412eb8f193ebd334fae340b2d91b6b7f2afe (diff)
downloadpublic-inbox-b18ecb7707e83cb8cb38c3736aecd984999ca0a7.tar.gz
This allows us to perform the expensive "dump_ibx" operations in
native C++ code using the Xapian C++ library.  This provides the
majority of the speedup with the -cindex --associate switch.

Eventually this may be expanded to cover all uses of Xapian
within the project to ensure we have access to Xapian APIs which
aren't available in XS|SWIG bindings; and also for
ease-of-installation on systems which don't provide
pre-packaged Perl Xapian bindings (e.g. OpenBSD 7.3) but
do provide Xapian development libraries.

Most of the C++ code is still C, as I'm not remotely familiar
with C++ compared to C.  I suspect many users and potential
hackers being from git, Linux kernel, and glibc world are in the
same boat.
Diffstat (limited to 'lib/PublicInbox/Search.pm')
-rw-r--r--lib/PublicInbox/Search.pm56
1 files changed, 49 insertions, 7 deletions
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index d5b0bceb..2e784646 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -66,6 +66,15 @@ our $NVRP; # '$Xap::'.('NumberValueRangeProcessor' or 'NumberRangeProcessor')
 # let's hope the ABI is stable
 our $ENQ_DESCENDING = 0;
 our $ENQ_ASCENDING = 1;
+our @MAIL_VMAP = (
+        [ YYYYMMDD, 'd:'],
+        [ DT, 'dt:' ],
+        # these are undocumented for WWW, but lei and IMAP use them
+        [ BYTES, 'z:' ],
+        [ TS, 'rt:' ],
+        [ UID, 'uid:' ]
+);
+our @MAIL_NRP;
 
 sub load_xapian () {
         return 1 if defined $Xap;
@@ -101,6 +110,7 @@ sub load_xapian () {
                 # or make indexlevel=medium as default
                 $QP_FLAGS = FLAG_PHRASE() | FLAG_BOOLEAN() | FLAG_LOVEHATE() |
                                 FLAG_WILDCARD();
+                @MAIL_NRP = map { $NVRP->new(@$_) } @MAIL_VMAP;
                 return 1;
         }
         undef;
@@ -490,14 +500,8 @@ sub qparse_new {
         my $qp = qp_init_common($self);
         my $cb = $qp->can('add_valuerangeprocessor') //
                 $qp->can('add_rangeprocessor'); # Xapian 1.5.0+
-        $cb->($qp, $NVRP->new(YYYYMMDD, 'd:'));
-        $cb->($qp, $NVRP->new(DT, 'dt:'));
-
-        # for IMAP, undocumented for WWW and may be split off go away
-        $cb->($qp, $NVRP->new(BYTES, 'z:'));
-        $cb->($qp, $NVRP->new(TS, 'rt:'));
-        $cb->($qp, $NVRP->new(UID, 'uid:'));
 
+        $cb->($qp, $_) for @MAIL_NRP;
         while (my ($name, $prefix) = each %bool_pfx_external) {
                 $qp->add_boolean_prefix($name, $_) foreach split(/ /, $prefix);
         }
@@ -527,6 +531,40 @@ EOF
         $qp;
 }
 
+sub generate_cxx () { # generates snippet for xap_helper.h
+        my $ret = <<EOM;
+# line ${\__LINE__} "${\__FILE__}"
+static NRP *mail_nrp[${\scalar(@MAIL_VMAP)}];
+static void mail_nrp_init(void)
+{
+EOM
+        for (0..$#MAIL_VMAP) {
+                my $x = $MAIL_VMAP[$_];
+                $ret .= qq{\tmail_nrp[$_] = new NRP($x->[0], "$x->[1]");\n}
+        }
+$ret .= <<EOM;
+}
+
+# line ${\__LINE__} "${\__FILE__}"
+static void qp_init_mail_search(Xapian::QueryParser *qp)
+{
+        for (size_t i = 0; i < MY_ARRAY_SIZE(mail_nrp); i++)
+                qp->ADD_RP(mail_nrp[i]);
+EOM
+        for my $name (sort keys %bool_pfx_external) {
+                for (split(/ /, $bool_pfx_external{$name})) {
+                        $ret .= qq{\tqp->add_boolean_prefix("$name", "$_");\n}
+                }
+        }
+        # TODO: altid support
+        for my $name (sort keys %prob_prefix) {
+                for (split(/ /, $prob_prefix{$name})) {
+                        $ret .= qq{\tqp->add_prefix("$name", "$_");\n}
+                }
+        }
+        $ret .= "}\n";
+}
+
 sub help {
         my ($self) = @_;
         $self->{qp} //= $self->qparse_new; # parse altids
@@ -585,4 +623,8 @@ sub all_terms {
         wantarray ? (sort keys %ret) : \%ret;
 }
 
+sub xh_args { # prep getopt args to feed to xap_helper.h socket
+        map { ('-d', $_) } shard_dirs($_[0]);
+}
+
 1;