From b18ecb7707e83cb8cb38c3736aecd984999ca0a7 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 24 Aug 2023 01:22:33 +0000 Subject: introduce optional C++ xap_helper This allows us to perform the expensive "dump_ibx" operations in native C++ code using the Xapian C++ library. This provides the majority of the speedup with the -cindex --associate switch. Eventually this may be expanded to cover all uses of Xapian within the project to ensure we have access to Xapian APIs which aren't available in XS|SWIG bindings; and also for ease-of-installation on systems which don't provide pre-packaged Perl Xapian bindings (e.g. OpenBSD 7.3) but do provide Xapian development libraries. Most of the C++ code is still C, as I'm not remotely familiar with C++ compared to C. I suspect many users and potential hackers being from git, Linux kernel, and glibc world are in the same boat. --- lib/PublicInbox/Search.pm | 56 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 49 insertions(+), 7 deletions(-) (limited to 'lib/PublicInbox/Search.pm') diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index d5b0bceb..2e784646 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -66,6 +66,15 @@ our $NVRP; # '$Xap::'.('NumberValueRangeProcessor' or 'NumberRangeProcessor') # let's hope the ABI is stable our $ENQ_DESCENDING = 0; our $ENQ_ASCENDING = 1; +our @MAIL_VMAP = ( + [ YYYYMMDD, 'd:'], + [ DT, 'dt:' ], + # these are undocumented for WWW, but lei and IMAP use them + [ BYTES, 'z:' ], + [ TS, 'rt:' ], + [ UID, 'uid:' ] +); +our @MAIL_NRP; sub load_xapian () { return 1 if defined $Xap; @@ -101,6 +110,7 @@ sub load_xapian () { # or make indexlevel=medium as default $QP_FLAGS = FLAG_PHRASE() | FLAG_BOOLEAN() | FLAG_LOVEHATE() | FLAG_WILDCARD(); + @MAIL_NRP = map { $NVRP->new(@$_) } @MAIL_VMAP; return 1; } undef; @@ -490,14 +500,8 @@ sub qparse_new { my $qp = qp_init_common($self); my $cb = $qp->can('add_valuerangeprocessor') // $qp->can('add_rangeprocessor'); # Xapian 1.5.0+ - $cb->($qp, $NVRP->new(YYYYMMDD, 'd:')); - $cb->($qp, $NVRP->new(DT, 'dt:')); - - # for IMAP, undocumented for WWW and may be split off go away - $cb->($qp, $NVRP->new(BYTES, 'z:')); - $cb->($qp, $NVRP->new(TS, 'rt:')); - $cb->($qp, $NVRP->new(UID, 'uid:')); + $cb->($qp, $_) for @MAIL_NRP; while (my ($name, $prefix) = each %bool_pfx_external) { $qp->add_boolean_prefix($name, $_) foreach split(/ /, $prefix); } @@ -527,6 +531,40 @@ EOF $qp; } +sub generate_cxx () { # generates snippet for xap_helper.h + my $ret = <[0], "$x->[1]");\n} + } +$ret .= <ADD_RP(mail_nrp[i]); +EOM + for my $name (sort keys %bool_pfx_external) { + for (split(/ /, $bool_pfx_external{$name})) { + $ret .= qq{\tqp->add_boolean_prefix("$name", "$_");\n} + } + } + # TODO: altid support + for my $name (sort keys %prob_prefix) { + for (split(/ /, $prob_prefix{$name})) { + $ret .= qq{\tqp->add_prefix("$name", "$_");\n} + } + } + $ret .= "}\n"; +} + sub help { my ($self) = @_; $self->{qp} //= $self->qparse_new; # parse altids @@ -585,4 +623,8 @@ sub all_terms { wantarray ? (sort keys %ret) : \%ret; } +sub xh_args { # prep getopt args to feed to xap_helper.h socket + map { ('-d', $_) } shard_dirs($_[0]); +} + 1; -- cgit v1.2.3-24-ge0c7