about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2024-05-05 23:35:10 +0000
committerEric Wong <e@80x24.org>2024-05-06 06:06:09 +0000
commit34709e6f0153bd92a117f542a7bfb76e7d289d2e (patch)
tree69fdfb6579344b941e7018373ca55623519c3fe1 /lib
parent7f3b57f4c1cbbb7ddfa41dde6d25276ee96d3fd4 (diff)
downloadpublic-inbox-34709e6f0153bd92a117f542a7bfb76e7d289d2e.tar.gz
External Xapian helper processes need to support non-standard
QueryParser prefixes.  The only way to do this is to specify
these prefixes in every `mset' request since we have no idea
if the XH worker servicing the request has initialized the
extra prefixes, yet.
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/Search.pm20
-rw-r--r--lib/PublicInbox/XapHelper.pm14
-rw-r--r--lib/PublicInbox/xap_helper.h35
3 files changed, 64 insertions, 5 deletions
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index fbdb48a3..e5c5d6ab 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -92,6 +92,7 @@ our @XH_SPEC = (
         'K=i', # timeout kill after i seconds
         'O=s', # eidx_key
         'T=i', # threadid
+        'Q=s@', # query prefixes "$user_prefix[:=]$XPREFIX"
 );
 
 sub load_xapian () {
@@ -435,8 +436,8 @@ sub xhc_start_maybe (@) {
         $xhc;
 }
 
-sub xh_opt ($) {
-        my ($opt) = @_;
+sub xh_opt ($$) {
+        my ($self, $opt) = @_;
         my $lim = $opt->{limit} || 50;
         my @ret;
         push @ret, '-o', $opt->{offset} if $opt->{offset};
@@ -458,7 +459,16 @@ sub xh_opt ($) {
         push @ret, '-t' if $opt->{threads};
         push @ret, '-T', $opt->{threadid} if defined $opt->{threadid};
         push @ret, '-O', $opt->{eidx_key} if defined $opt->{eidx_key};
-        @ret;
+        my $apfx = $self->{-alt_pfx} //= do {
+                my @tmp;
+                for (grep /\Aserial:/, @{$self->{altid} // []}) {
+                        my (undef, $pfx) = split /:/, $_;
+                        push @tmp, '-Q', "$pfx=X\U$pfx";
+                }
+                # TODO: arbitrary header indexing goes here
+                \@tmp;
+        };
+        (@ret, @$apfx);
 }
 
 # returns a true value if actually handled asynchronously,
@@ -467,7 +477,7 @@ sub async_mset {
         my ($self, $qry_str, $opt, $cb, @args) = @_;
         if ($XHC) { # unconditionally retrieving pct + rank for now
                 xdb($self); # populate {nshards}
-                my @margs = ($self->xh_args, xh_opt($opt));
+                my @margs = ($self->xh_args, xh_opt($self, $opt));
                 my $ret = eval {
                         my $rd = $XHC->mkreq(undef, 'mset', @margs, $qry_str);
                         PublicInbox::XhcMset->maybe_new($rd, $self, $cb, @args);
@@ -630,7 +640,7 @@ EOM
                         $ret .= qq{\tqp->add_boolean_prefix("$name", "$_");\n}
                 }
         }
-        # TODO: altid support
+        # altid support is handled in xh_opt and srch_init_extra in XH
         for my $name (sort keys %prob_prefix) {
                 for (split(/ /, $prob_prefix{$name})) {
                         $ret .= qq{\tqp->add_prefix("$name", "$_");\n}
diff --git a/lib/PublicInbox/XapHelper.pm b/lib/PublicInbox/XapHelper.pm
index 2e20660e..099bc4fe 100644
--- a/lib/PublicInbox/XapHelper.pm
+++ b/lib/PublicInbox/XapHelper.pm
@@ -172,6 +172,18 @@ sub cmd_mset { # to be used by WWW + IMAP
         }
 }
 
+sub srch_init_extra ($) {
+        my ($req) = @_;
+        my $qp = $req->{srch}->{qp};
+        for (@{$req->{Q}}) {
+                my ($upfx, $m, $xpfx) = split /([:=])/;
+                $xpfx // die "E: bad -Q $_";
+                $m = $m eq '=' ? 'add_boolean_prefix' : 'add_prefix';
+                $qp->$m($upfx, $xpfx);
+        }
+        $req->{srch}->{qp_extra_done} = 1;
+}
+
 sub dispatch {
         my ($req, $cmd, @argv) = @_;
         my $fn = $req->can("cmd_$cmd") or return;
@@ -195,6 +207,8 @@ sub dispatch {
                 $new->{qp} = $new->qparse_new;
                 $new;
         };
+        $req->{Q} && !$req->{srch}->{qp_extra_done} and
+                srch_init_extra $req;
         my $timeo = $req->{K};
         alarm($timeo) if $timeo;
         $fn->($req, @argv);
diff --git a/lib/PublicInbox/xap_helper.h b/lib/PublicInbox/xap_helper.h
index 3df3ce91..bdc1c5b1 100644
--- a/lib/PublicInbox/xap_helper.h
+++ b/lib/PublicInbox/xap_helper.h
@@ -114,6 +114,7 @@ enum exc_iter {
 struct srch {
         int paths_len; // int for comparisons
         unsigned qp_flags;
+        bool qp_extra_done;
         Xapian::Database *db;
         Xapian::QueryParser *qp;
         char paths[]; // $shard_path0\0$shard_path1\0...
@@ -126,6 +127,7 @@ typedef bool (*cmd)(struct req *);
 struct req { // argv and pfxv point into global rbuf
         char *argv[MY_ARG_MAX];
         char *pfxv[MY_ARG_MAX]; // -A <prefix>
+        char *qpfxv[MY_ARG_MAX]; // -Q <user_prefix>[:=]<INTERNAL_PREFIX>
         size_t *lenv; // -A <prefix>LENGTH
         struct srch *srch;
         char *Pgit_dir;
@@ -139,6 +141,7 @@ struct req { // argv and pfxv point into global rbuf
         long sort_col; // value column, negative means BoolWeight
         int argc;
         int pfxc;
+        int qpfxc;
         FILE *fp[2]; // [0] response pipe or sock, [1] status/errors (optional)
         bool has_input; // fp[0] is bidirectional
         bool collapse_threads;
@@ -584,6 +587,31 @@ static bool srch_init(struct req *req)
         return true;
 }
 
+// setup query parser for altid and arbitrary headers
+static void srch_init_extra(struct req *req)
+{
+        const char *XPFX;
+        for (int i = 0; i < req->qpfxc; i++) {
+                size_t len = strlen(req->qpfxv[i]);
+                char *c = (char *)memchr(req->qpfxv[i], '=', len);
+
+                if (c) { // it's boolean "gmane=XGMANE"
+                        XPFX = c + 1;
+                        *c = 0;
+                        req->srch->qp->add_boolean_prefix(req->qpfxv[i], XPFX);
+                        continue;
+                }
+                // maybe it's a non-boolean prefix "blob:XBLOBID"
+                c = (char *)memchr(req->qpfxv[i], ':', len);
+                if (!c)
+                        errx(EXIT_FAILURE, "bad -Q %s", req->qpfxv[i]);
+                XPFX = c + 1;
+                *c = 0;
+                req->srch->qp->add_prefix(req->qpfxv[i], XPFX);
+        }
+        req->srch->qp_extra_done = true;
+}
+
 static void free_srch(void *p) // tdestroy
 {
         struct srch *srch = (struct srch *)p;
@@ -665,12 +693,17 @@ static void dispatch(struct req *req)
                         if (*end || req->threadid == ULLONG_MAX)
                                 ABORT("-T %s", optarg);
                         break;
+                case 'Q':
+                        req->qpfxv[req->qpfxc++] = optarg;
+                        if (MY_ARG_MAX == req->qpfxc) ABORT("too many -Q");
+                        break;
                 default: ABORT("bad switch `-%c'", c);
                 }
         }
         ERR_CLOSE(kfp, EXIT_FAILURE); // may ENOMEM, sets kbuf.srch
         kbuf.srch->db = NULL;
         kbuf.srch->qp = NULL;
+        kbuf.srch->qp_extra_done = false;
         kbuf.srch->paths_len = size - offsetof(struct srch, paths);
         if (kbuf.srch->paths_len <= 0)
                 ABORT("no -d args");
@@ -687,6 +720,8 @@ static void dispatch(struct req *req)
                 free_srch(kbuf.srch);
                 goto cmd_err; // srch_init already warned
         }
+        if (req->qpfxc && !req->srch->qp_extra_done)
+                srch_init_extra(req);
         if (req->timeout_sec)
                 alarm(req->timeout_sec > UINT_MAX ?
                         UINT_MAX : (unsigned)req->timeout_sec);