From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 01/10] lei: move external vivification to xsearch
Date: Sat, 23 Jan 2021 10:27:46 +0000 [thread overview]
Message-ID: <20210123102755.425-2-e@80x24.org> (raw)
In-Reply-To: <20210123102755.425-1-e@80x24.org>
This seems like a better place to put it given upcoming
URI support, which starts in this commit.
---
lib/PublicInbox/LeiQuery.pm | 27 +++++------------
lib/PublicInbox/LeiXSearch.pm | 57 ++++++++++++++++++++++++-----------
t/lei_xsearch.t | 5 ++-
3 files changed, 50 insertions(+), 39 deletions(-)
diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm
index 7d634b5e..eebf217b 100644
--- a/lib/PublicInbox/LeiQuery.pm
+++ b/lib/PublicInbox/LeiQuery.pm
@@ -7,19 +7,6 @@ use strict;
use v5.10.1;
use PublicInbox::DS qw(dwaitpid);
-sub _vivify_external { # _externals_each callback
- my ($src, $dir) = @_;
- if (-f "$dir/ei.lock") {
- require PublicInbox::ExtSearch;
- push @$src, PublicInbox::ExtSearch->new($dir);
- } elsif (-f "$dir/inbox.lock" || -d "$dir/public-inbox") { # v2, v1
- require PublicInbox::Inbox;
- push @$src, bless { inboxdir => $dir }, 'PublicInbox::Inbox';
- } else {
- warn "W: ignoring $dir, unable to determine type\n";
- }
-}
-
# the main "lei q SEARCH_TERMS" method
sub lei_q {
my ($self, @argv) = @_;
@@ -27,19 +14,19 @@ sub lei_q {
require PublicInbox::LeiOverview;
PublicInbox::Config->json; # preload before forking
my $opt = $self->{opt};
- my @srcs; # any number of LeiXSearch || LeiSearch || Inbox
+ my $lxs = $self->{lxs} = PublicInbox::LeiXSearch->new;
+ # any number of LeiXSearch || LeiSearch || Inbox
if ($opt->{'local'} //= 1) { # --local is enabled by default
my $sto = $self->_lei_store(1);
- push @srcs, $sto->search;
+ $lxs->prepare_external($sto->search);
}
- my $lxs = $self->{lxs} = PublicInbox::LeiXSearch->new;
# --external is enabled by default, but allow --no-external
if ($opt->{external} //= 1) {
- $self->_externals_each(\&_vivify_external, \@srcs);
+ my $cb = $lxs->can('prepare_external');
+ $self->_externals_each($cb, $lxs);
}
- my $xj = $opt->{jobs} // (scalar(@srcs) > 3 ? 3 : scalar(@srcs));
- $xj = 1 if !$opt->{thread};
+ my $xj = $opt->{thread} ? $lxs->locals : ($lxs->remotes + 1);
my $ovv = PublicInbox::LeiOverview->new($self) or return;
$self->atfork_prepare_wq($lxs);
$lxs->wq_workers_start('lei_xsearch', $xj, $self->oldset);
@@ -76,7 +63,7 @@ sub lei_q {
$mset_opt{relevance} //= -2 if $opt->{thread};
$self->{mset_opt} = \%mset_opt;
$ovv->ovv_begin($self);
- $lxs->do_query($self, \@srcs);
+ $lxs->do_query($self);
}
1;
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 987a9896..10c25246 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -26,10 +26,6 @@ sub new {
sub attach_external {
my ($self, $ibxish) = @_; # ibxish = ExtSearch or Inbox
-
- if (!$ibxish->can('over') || !$ibxish->over) {
- return push(@{$self->{remotes}}, $ibxish)
- }
my $desc = $ibxish->{inboxdir} // $ibxish->{topdir};
my $srch = $ibxish->search or
return warn("$desc not indexed for Xapian\n");
@@ -59,10 +55,9 @@ sub attach_external {
}
# returns a list of local inboxes (or count in scalar context)
-sub locals {
- my %uniq = map {; "$_" => $_ } @{$_[0]->{shard2ibx} // []};
- values %uniq;
-}
+sub locals { @{$_[0]->{locals} // []} }
+
+sub remotes { @{$_[0]->{remotes} // []} }
# called by PublicInbox::Search::xdb
sub xdb_shards_flat { @{$_[0]->{shards_flat} // []} }
@@ -148,14 +143,16 @@ sub query_thread_mset { # for --thread
}
sub query_mset { # non-parallel for non-"--thread" users
- my ($self, $lei, $srcs) = @_;
+ my ($self, $lei) = @_;
local $0 = "$0 query_mset";
my $startq = delete $self->{5};
my %sig = $lei->atfork_child_wq($self);
local @SIG{keys %sig} = values %sig;
my $mo = { %{$lei->{mset_opt}} };
my $mset;
- $self->attach_external($_) for @$srcs;
+ for my $loc (locals($self)) {
+ attach_external($self, $loc);
+ }
my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei, $self);
my $dedupe = $lei->{dedupe} // die 'BUG: {dedupe} missing';
$dedupe->prepare_dedupe;
@@ -172,6 +169,10 @@ sub query_mset { # non-parallel for non-"--thread" users
$lei->{ovv}->ovv_atexit_child($lei);
}
+sub query_remote_mboxrd {
+ my ($self, $lei, $uri) = @_;
+}
+
sub git {
my ($self) = @_;
my (%seen, @dirs);
@@ -221,18 +222,17 @@ sub do_post_augment {
}
sub start_query { # always runs in main (lei-daemon) process
- my ($self, $io, $lei, $srcs) = @_;
- my $remotes = $self->{remotes} // [];
+ my ($self, $io, $lei) = @_;
if ($lei->{opt}->{thread}) {
- for my $ibxish (@$srcs) {
+ for my $ibxish (locals($self)) {
$self->wq_do('query_thread_mset', $io, $lei, $ibxish);
}
} else {
- $self->wq_do('query_mset', $io, $lei, $srcs);
+ $self->wq_do('query_mset', $io, $lei);
}
# TODO
- for my $rmt (@$remotes) {
- $self->wq_do('query_thread_mbox', $io, $lei, $rmt);
+ for my $uri (remotes($self)) {
+ $self->wq_do('query_remote_mboxrd', $io, $lei, $uri);
}
@$io = ();
}
@@ -259,7 +259,7 @@ sub sigpipe_handler { # handles SIGPIPE from l2m/lxs workers
}
sub do_query {
- my ($self, $lei_orig, $srcs) = @_;
+ my ($self, $lei_orig) = @_;
my ($lei, @io) = $lei_orig->atfork_parent_wq($self);
$io[0] = undef;
pipe(my $done, $io[0]) or die "pipe $!";
@@ -286,7 +286,7 @@ sub do_query {
$io[5] = $startq;
$io[1] = $zpipe->[1] if $zpipe;
}
- start_query($self, \@io, $lei, $srcs);
+ start_query($self, \@io, $lei);
$self->wq_close(1);
unless ($in_loop) {
# for the $lei->atfork_child_wq PIPE handler:
@@ -302,4 +302,25 @@ sub ipc_atfork_prepare {
$self->SUPER::ipc_atfork_prepare; # PublicInbox::IPC
}
+sub prepare_external {
+ my ($self, $loc, $boost) = @_; # n.b. already ordered by boost
+ if (ref $loc) { # already a URI, or PublicInbox::Inbox-like object
+ return push(@{$self->{remotes}}, $loc) if $loc->can('scheme');
+ } elsif ($loc =~ m!\Ahttps?://!) {
+ require URI;
+ return push(@{$self->{remotes}}, URI->new($loc));
+ } elsif (-f "$loc/ei.lock") {
+ require PublicInbox::ExtSearch;
+ $loc = PublicInbox::ExtSearch->new($loc);
+ } elsif (-f "$loc/inbox.lock" || -d "$loc/public-inbox") {
+ require PublicInbox::Inbox; # v2, v1
+ $loc = bless { inboxdir => $loc }, 'PublicInbox::Inbox';
+ } else {
+ warn "W: ignoring $loc, unable to determine type\n";
+ return;
+ }
+ push @{$self->{locals}}, $loc;
+}
+
+
1;
diff --git a/t/lei_xsearch.t b/t/lei_xsearch.t
index 8b03c1f2..f745ea3e 100644
--- a/t/lei_xsearch.t
+++ b/t/lei_xsearch.t
@@ -49,7 +49,10 @@ $eidx->eidx_sync({fsync => 0});
my $es = PublicInbox::ExtSearch->new("$home/eidx");
my $lxs = PublicInbox::LeiXSearch->new;
for my $ibxish (shuffle($es, @ibx)) {
- $lxs->attach_external($ibxish);
+ $lxs->prepare_external($ibxish);
+}
+for my $loc ($lxs->locals) {
+ $lxs->attach_external($loc);
}
my $nr = $lxs->xdb->get_doccount;
my $mset = $lxs->mset('d:19931002..19931003', { limit => $nr });
next prev parent reply other threads:[~2021-01-23 10:27 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-01-23 10:27 [PATCH 00/10] lei: externals more stuff Eric Wong
2021-01-23 10:27 ` Eric Wong [this message]
2021-01-23 10:27 ` [PATCH 02/10] lei: support remote externals Eric Wong
2021-01-24 6:01 ` Kyle Meyer
2021-01-24 12:02 ` Eric Wong
2021-01-24 12:12 ` Eric Wong
2021-01-24 22:11 ` Kyle Meyer
2021-01-25 18:37 ` Eric Wong
2021-01-23 10:27 ` [PATCH 03/10] lei_to_mail: drop cyclic reference if not using IPC Eric Wong
2021-01-23 10:27 ` [PATCH 04/10] lei: oneshot: preserve stdout if writing mbox Eric Wong
2021-01-23 10:27 ` [PATCH 05/10] lei: default "-f $mfolder" args for common MUAs Eric Wong
2021-01-23 10:27 ` [PATCH 06/10] lei completion: handle URLs with port numbers Eric Wong
2021-01-23 10:27 ` [PATCH 07/10] lei forget-external: just show the location Eric Wong
2021-01-23 10:27 ` [PATCH 08/10] lei q: support a bunch of curl(1) options Eric Wong
2021-01-23 10:27 ` [PATCH 09/10] lei forget-external: don't show redundant "not found" Eric Wong
2021-01-23 10:27 ` [PATCH 10/10] lei add-external: don't allow non-existent directories Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210123102755.425-2-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).