diff options
author | Eric Wong <e@80x24.org> | 2023-08-24 01:22:33 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2023-08-24 07:47:51 +0000 |
commit | b18ecb7707e83cb8cb38c3736aecd984999ca0a7 (patch) | |
tree | 0f159212810c98aa07d26b6f7f28f4b8dbc9b302 /lib/PublicInbox/Search.pm | |
parent | cf96412eb8f193ebd334fae340b2d91b6b7f2afe (diff) | |
download | public-inbox-b18ecb7707e83cb8cb38c3736aecd984999ca0a7.tar.gz |
This allows us to perform the expensive "dump_ibx" operations in native C++ code using the Xapian C++ library. This provides the majority of the speedup with the -cindex --associate switch. Eventually this may be expanded to cover all uses of Xapian within the project to ensure we have access to Xapian APIs which aren't available in XS|SWIG bindings; and also for ease-of-installation on systems which don't provide pre-packaged Perl Xapian bindings (e.g. OpenBSD 7.3) but do provide Xapian development libraries. Most of the C++ code is still C, as I'm not remotely familiar with C++ compared to C. I suspect many users and potential hackers being from git, Linux kernel, and glibc world are in the same boat.
Diffstat (limited to 'lib/PublicInbox/Search.pm')
-rw-r--r-- | lib/PublicInbox/Search.pm | 56 |
1 files changed, 49 insertions, 7 deletions
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index d5b0bceb..2e784646 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -66,6 +66,15 @@ our $NVRP; # '$Xap::'.('NumberValueRangeProcessor' or 'NumberRangeProcessor') # let's hope the ABI is stable our $ENQ_DESCENDING = 0; our $ENQ_ASCENDING = 1; +our @MAIL_VMAP = ( + [ YYYYMMDD, 'd:'], + [ DT, 'dt:' ], + # these are undocumented for WWW, but lei and IMAP use them + [ BYTES, 'z:' ], + [ TS, 'rt:' ], + [ UID, 'uid:' ] +); +our @MAIL_NRP; sub load_xapian () { return 1 if defined $Xap; @@ -101,6 +110,7 @@ sub load_xapian () { # or make indexlevel=medium as default $QP_FLAGS = FLAG_PHRASE() | FLAG_BOOLEAN() | FLAG_LOVEHATE() | FLAG_WILDCARD(); + @MAIL_NRP = map { $NVRP->new(@$_) } @MAIL_VMAP; return 1; } undef; @@ -490,14 +500,8 @@ sub qparse_new { my $qp = qp_init_common($self); my $cb = $qp->can('add_valuerangeprocessor') // $qp->can('add_rangeprocessor'); # Xapian 1.5.0+ - $cb->($qp, $NVRP->new(YYYYMMDD, 'd:')); - $cb->($qp, $NVRP->new(DT, 'dt:')); - - # for IMAP, undocumented for WWW and may be split off go away - $cb->($qp, $NVRP->new(BYTES, 'z:')); - $cb->($qp, $NVRP->new(TS, 'rt:')); - $cb->($qp, $NVRP->new(UID, 'uid:')); + $cb->($qp, $_) for @MAIL_NRP; while (my ($name, $prefix) = each %bool_pfx_external) { $qp->add_boolean_prefix($name, $_) foreach split(/ /, $prefix); } @@ -527,6 +531,40 @@ EOF $qp; } +sub generate_cxx () { # generates snippet for xap_helper.h + my $ret = <<EOM; +# line ${\__LINE__} "${\__FILE__}" +static NRP *mail_nrp[${\scalar(@MAIL_VMAP)}]; +static void mail_nrp_init(void) +{ +EOM + for (0..$#MAIL_VMAP) { + my $x = $MAIL_VMAP[$_]; + $ret .= qq{\tmail_nrp[$_] = new NRP($x->[0], "$x->[1]");\n} + } +$ret .= <<EOM; +} + +# line ${\__LINE__} "${\__FILE__}" +static void qp_init_mail_search(Xapian::QueryParser *qp) +{ + for (size_t i = 0; i < MY_ARRAY_SIZE(mail_nrp); i++) + qp->ADD_RP(mail_nrp[i]); +EOM + for my $name (sort keys %bool_pfx_external) { + for (split(/ /, $bool_pfx_external{$name})) { + $ret .= qq{\tqp->add_boolean_prefix("$name", "$_");\n} + } + } + # TODO: altid support + for my $name (sort keys %prob_prefix) { + for (split(/ /, $prob_prefix{$name})) { + $ret .= qq{\tqp->add_prefix("$name", "$_");\n} + } + } + $ret .= "}\n"; +} + sub help { my ($self) = @_; $self->{qp} //= $self->qparse_new; # parse altids @@ -585,4 +623,8 @@ sub all_terms { wantarray ? (sort keys %ret) : \%ret; } +sub xh_args { # prep getopt args to feed to xap_helper.h socket + map { ('-d', $_) } shard_dirs($_[0]); +} + 1; |