diff options
author | Eric Wong <e@80x24.org> | 2024-05-05 23:35:10 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2024-05-06 06:06:09 +0000 |
commit | 34709e6f0153bd92a117f542a7bfb76e7d289d2e (patch) | |
tree | 69fdfb6579344b941e7018373ca55623519c3fe1 /lib | |
parent | 7f3b57f4c1cbbb7ddfa41dde6d25276ee96d3fd4 (diff) | |
download | public-inbox-34709e6f0153bd92a117f542a7bfb76e7d289d2e.tar.gz |
External Xapian helper processes need to support non-standard QueryParser prefixes. The only way to do this is to specify these prefixes in every `mset' request since we have no idea if the XH worker servicing the request has initialized the extra prefixes, yet.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/PublicInbox/Search.pm | 20 | ||||
-rw-r--r-- | lib/PublicInbox/XapHelper.pm | 14 | ||||
-rw-r--r-- | lib/PublicInbox/xap_helper.h | 35 |
3 files changed, 64 insertions, 5 deletions
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index fbdb48a3..e5c5d6ab 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -92,6 +92,7 @@ our @XH_SPEC = ( 'K=i', # timeout kill after i seconds 'O=s', # eidx_key 'T=i', # threadid + 'Q=s@', # query prefixes "$user_prefix[:=]$XPREFIX" ); sub load_xapian () { @@ -435,8 +436,8 @@ sub xhc_start_maybe (@) { $xhc; } -sub xh_opt ($) { - my ($opt) = @_; +sub xh_opt ($$) { + my ($self, $opt) = @_; my $lim = $opt->{limit} || 50; my @ret; push @ret, '-o', $opt->{offset} if $opt->{offset}; @@ -458,7 +459,16 @@ sub xh_opt ($) { push @ret, '-t' if $opt->{threads}; push @ret, '-T', $opt->{threadid} if defined $opt->{threadid}; push @ret, '-O', $opt->{eidx_key} if defined $opt->{eidx_key}; - @ret; + my $apfx = $self->{-alt_pfx} //= do { + my @tmp; + for (grep /\Aserial:/, @{$self->{altid} // []}) { + my (undef, $pfx) = split /:/, $_; + push @tmp, '-Q', "$pfx=X\U$pfx"; + } + # TODO: arbitrary header indexing goes here + \@tmp; + }; + (@ret, @$apfx); } # returns a true value if actually handled asynchronously, @@ -467,7 +477,7 @@ sub async_mset { my ($self, $qry_str, $opt, $cb, @args) = @_; if ($XHC) { # unconditionally retrieving pct + rank for now xdb($self); # populate {nshards} - my @margs = ($self->xh_args, xh_opt($opt)); + my @margs = ($self->xh_args, xh_opt($self, $opt)); my $ret = eval { my $rd = $XHC->mkreq(undef, 'mset', @margs, $qry_str); PublicInbox::XhcMset->maybe_new($rd, $self, $cb, @args); @@ -630,7 +640,7 @@ EOM $ret .= qq{\tqp->add_boolean_prefix("$name", "$_");\n} } } - # TODO: altid support + # altid support is handled in xh_opt and srch_init_extra in XH for my $name (sort keys %prob_prefix) { for (split(/ /, $prob_prefix{$name})) { $ret .= qq{\tqp->add_prefix("$name", "$_");\n} diff --git a/lib/PublicInbox/XapHelper.pm b/lib/PublicInbox/XapHelper.pm index 2e20660e..099bc4fe 100644 --- a/lib/PublicInbox/XapHelper.pm +++ b/lib/PublicInbox/XapHelper.pm @@ -172,6 +172,18 @@ sub cmd_mset { # to be used by WWW + IMAP } } +sub srch_init_extra ($) { + my ($req) = @_; + my $qp = $req->{srch}->{qp}; + for (@{$req->{Q}}) { + my ($upfx, $m, $xpfx) = split /([:=])/; + $xpfx // die "E: bad -Q $_"; + $m = $m eq '=' ? 'add_boolean_prefix' : 'add_prefix'; + $qp->$m($upfx, $xpfx); + } + $req->{srch}->{qp_extra_done} = 1; +} + sub dispatch { my ($req, $cmd, @argv) = @_; my $fn = $req->can("cmd_$cmd") or return; @@ -195,6 +207,8 @@ sub dispatch { $new->{qp} = $new->qparse_new; $new; }; + $req->{Q} && !$req->{srch}->{qp_extra_done} and + srch_init_extra $req; my $timeo = $req->{K}; alarm($timeo) if $timeo; $fn->($req, @argv); diff --git a/lib/PublicInbox/xap_helper.h b/lib/PublicInbox/xap_helper.h index 3df3ce91..bdc1c5b1 100644 --- a/lib/PublicInbox/xap_helper.h +++ b/lib/PublicInbox/xap_helper.h @@ -114,6 +114,7 @@ enum exc_iter { struct srch { int paths_len; // int for comparisons unsigned qp_flags; + bool qp_extra_done; Xapian::Database *db; Xapian::QueryParser *qp; char paths[]; // $shard_path0\0$shard_path1\0... @@ -126,6 +127,7 @@ typedef bool (*cmd)(struct req *); struct req { // argv and pfxv point into global rbuf char *argv[MY_ARG_MAX]; char *pfxv[MY_ARG_MAX]; // -A <prefix> + char *qpfxv[MY_ARG_MAX]; // -Q <user_prefix>[:=]<INTERNAL_PREFIX> size_t *lenv; // -A <prefix>LENGTH struct srch *srch; char *Pgit_dir; @@ -139,6 +141,7 @@ struct req { // argv and pfxv point into global rbuf long sort_col; // value column, negative means BoolWeight int argc; int pfxc; + int qpfxc; FILE *fp[2]; // [0] response pipe or sock, [1] status/errors (optional) bool has_input; // fp[0] is bidirectional bool collapse_threads; @@ -584,6 +587,31 @@ static bool srch_init(struct req *req) return true; } +// setup query parser for altid and arbitrary headers +static void srch_init_extra(struct req *req) +{ + const char *XPFX; + for (int i = 0; i < req->qpfxc; i++) { + size_t len = strlen(req->qpfxv[i]); + char *c = (char *)memchr(req->qpfxv[i], '=', len); + + if (c) { // it's boolean "gmane=XGMANE" + XPFX = c + 1; + *c = 0; + req->srch->qp->add_boolean_prefix(req->qpfxv[i], XPFX); + continue; + } + // maybe it's a non-boolean prefix "blob:XBLOBID" + c = (char *)memchr(req->qpfxv[i], ':', len); + if (!c) + errx(EXIT_FAILURE, "bad -Q %s", req->qpfxv[i]); + XPFX = c + 1; + *c = 0; + req->srch->qp->add_prefix(req->qpfxv[i], XPFX); + } + req->srch->qp_extra_done = true; +} + static void free_srch(void *p) // tdestroy { struct srch *srch = (struct srch *)p; @@ -665,12 +693,17 @@ static void dispatch(struct req *req) if (*end || req->threadid == ULLONG_MAX) ABORT("-T %s", optarg); break; + case 'Q': + req->qpfxv[req->qpfxc++] = optarg; + if (MY_ARG_MAX == req->qpfxc) ABORT("too many -Q"); + break; default: ABORT("bad switch `-%c'", c); } } ERR_CLOSE(kfp, EXIT_FAILURE); // may ENOMEM, sets kbuf.srch kbuf.srch->db = NULL; kbuf.srch->qp = NULL; + kbuf.srch->qp_extra_done = false; kbuf.srch->paths_len = size - offsetof(struct srch, paths); if (kbuf.srch->paths_len <= 0) ABORT("no -d args"); @@ -687,6 +720,8 @@ static void dispatch(struct req *req) free_srch(kbuf.srch); goto cmd_err; // srch_init already warned } + if (req->qpfxc && !req->srch->qp_extra_done) + srch_init_extra(req); if (req->timeout_sec) alarm(req->timeout_sec > UINT_MAX ? UINT_MAX : (unsigned)req->timeout_sec); |