From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 26/26] lei: extinbox: start implementing in config file
Date: Fri, 18 Dec 2020 12:09:50 +0000 [thread overview]
Message-ID: <20201218120950.23272-27-e@80x24.org> (raw)
In-Reply-To: <20201218120950.23272-1-e@80x24.org>
They need to be indexed by MiscIdx, but MiscIdx
still needs more work to support faster config
loading when dealing with ~100K data sources.
---
lib/PublicInbox/LEI.pm | 19 ++++-----
lib/PublicInbox/LeiExtinbox.pm | 52 ++++++++++++++++++++++++
t/lei.t | 72 ++++++++++++++++++++++++++++++++--
3 files changed, 130 insertions(+), 13 deletions(-)
create mode 100644 lib/PublicInbox/LeiExtinbox.pm
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 97c5d91b..b254e2c5 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -8,7 +8,7 @@
package PublicInbox::LEI;
use strict;
use v5.10.1;
-use parent qw(PublicInbox::DS);
+use parent qw(PublicInbox::DS PublicInbox::LeiExtinbox);
use Getopt::Long ();
use Socket qw(AF_UNIX SOCK_STREAM pack_sockaddr_un);
use Errno qw(EAGAIN ECONNREFUSED ENOENT);
@@ -79,12 +79,12 @@ our %CMD = ( # sorted in order of importance/use:
'add-extinbox' => [ 'URL_OR_PATHNAME',
'add/set priority of a publicinbox|extindex for extra matches',
- qw(prio=i) ],
+ qw(boost=i quiet|q) ],
'ls-extinbox' => [ '[FILTER...]', 'list publicinbox|extindex locations',
- qw(format|f=s z local remote) ],
+ qw(format|f=s z|0 local remote quiet|q) ],
'forget-extinbox' => [ '{URL_OR_PATHNAME|--prune}',
'exclude further results from a publicinbox|extindex',
- qw(prune) ],
+ qw(prune quiet|q) ],
'ls-query' => [ '[FILTER...]', 'list saved search queries',
qw(name-only format|f=s z) ],
@@ -107,7 +107,7 @@ our %CMD = ( # sorted in order of importance/use:
# code repos are used for `show' to solve blobs from patch mails
'add-coderepo' => [ 'PATHNAME', 'add or set priority of a git code repo',
- qw(prio=i) ],
+ qw(boost=i) ],
'ls-coderepo' => [ '[FILTER_TERMS...]',
'list known code repos', qw(format|f=s z) ],
'forget-coderepo' => [ 'PATHNAME',
@@ -197,7 +197,7 @@ my %OPTDESC = (
'sort|s=s@' => [ 'VAL|internaldate,date,relevance,docid',
"order of results `--output'-dependent"],
-'prio=i' => 'priority of query source',
+'boost=i' => 'increase/decrease priority of results (default: 0)',
'local' => 'limit operations to the local filesystem',
'local!' => 'exclude results from the local filesystem',
@@ -217,8 +217,7 @@ my %OPTDESC = (
'by-mid|mid:s' => [ 'MID', 'match only by Message-ID, ignoring contents' ],
'jobs:i' => 'set parallelism level',
-# xargs, env, use "-0", git(1) uses "-z". Should we support z|0 everywhere?
-'z' => 'use NUL \\0 instead of newline (CR) to delimit lines',
+# xargs, env, use "-0", git(1) uses "-z". We support z|0 everywhere
'z|0' => 'use NUL \\0 instead of newline (CR) to delimit lines',
# note: no "--ignore-environment" / "-i" support like env(1) since that
@@ -455,7 +454,9 @@ sub _lei_store ($;$) {
$cfg->{-lei_store} //= do {
require PublicInbox::LeiStore;
PublicInbox::SearchIdx::load_xapian_writable();
- defined(my $dir = $cfg->{'leistore.dir'}) or return;
+ my $dir = $cfg->{'leistore.dir'};
+ $dir //= _store_path($self->{env}) if $creat;
+ return unless $dir;
PublicInbox::LeiStore->new($dir, { creat => $creat });
};
}
diff --git a/lib/PublicInbox/LeiExtinbox.pm b/lib/PublicInbox/LeiExtinbox.pm
new file mode 100644
index 00000000..2f52b115
--- /dev/null
+++ b/lib/PublicInbox/LeiExtinbox.pm
@@ -0,0 +1,52 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# *-extinbox commands of lei
+package PublicInbox::LeiExtinbox;
+use strict;
+use v5.10.1;
+use parent qw(Exporter);
+our @EXPORT = qw(lei_ls_extinbox lei_add_extinbox lei_forget_extinbox);
+
+sub lei_ls_extinbox {
+ my ($self, @argv) = @_;
+ my $stor = $self->_lei_store(0);
+ my $cfg = $self->_lei_cfg(0);
+ my $out = $self->{1};
+ my ($OFS, $ORS) = $self->{opt}->{z} ? ("\0", "\0\0") : (" ", "\n");
+ my (%boost, @loc);
+ for my $sec (grep(/\Aextinbox\./, @{$cfg->{-section_order}})) {
+ my $loc = substr($sec, length('extinbox.'));
+ $boost{$loc} = $cfg->{"$sec.boost"};
+ push @loc, $loc;
+ }
+ my $out = $self->{1};
+ use sort 'stable';
+ # highest boost first, but stable for alphabetic tie break
+ for (sort { $boost{$b} <=> $boost{$a} } sort keys %boost) {
+ # TODO: use miscidx and show docid so forget/set is easier
+ print $out $_, $OFS, 'boost=', $boost{$_}, $ORS;
+ }
+}
+
+sub lei_add_extinbox {
+ my ($self, $url_or_dir) = @_;
+ my $cfg = $self->_lei_cfg(1);
+ if ($url_or_dir !~ m!\Ahttps?://!) {
+ $url_or_dir = File::Spec->canonpath($url_or_dir);
+ }
+ my $new_boost = $self->{opt}->{boost} // 0;
+ my $key = "extinbox.$url_or_dir.boost";
+ my $cur_boost = $cfg->{$key};
+ return if defined($cur_boost) && $cur_boost == $new_boost; # idempotent
+ $self->lei_config($key, $new_boost);
+ my $stor = $self->_lei_store(1);
+ # TODO: add to MiscIdx
+ $stor->done;
+}
+
+sub lei_forget_extinbox {
+ # TODO
+}
+
+1;
diff --git a/t/lei.t b/t/lei.t
index 30f9d2b6..a95a0efc 100644
--- a/t/lei.t
+++ b/t/lei.t
@@ -7,17 +7,18 @@ use Test::More;
use PublicInbox::TestCommon;
use PublicInbox::Config;
use File::Path qw(rmtree);
+require_git 2.6;
require_mods(qw(json DBD::SQLite Search::Xapian));
my $LEI = 'lei';
my $opt = { 1 => \(my $out = ''), 2 => \(my $err = '') };
my $lei = sub {
- my ($cmd, $env, $opt) = @_;
+ my ($cmd, $env, $xopt) = @_;
$out = $err = '';
if (!ref($cmd)) {
- ($env, $opt) = grep { (!defined) || ref } @_;
- $cmd = [ grep { defined } @_ ];
+ ($env, $xopt) = grep { (!defined) || ref } @_;
+ $cmd = [ grep { defined && !ref } @_ ];
}
- run_script([$LEI, @$cmd], $env, $opt);
+ run_script([$LEI, @$cmd], $env, $xopt // $opt);
};
my ($home, $for_destroy) = tmpdir();
@@ -29,6 +30,8 @@ local $ENV{FOO} = 'BAR';
mkdir "$home/xdg_run", 0700 or BAIL_OUT "mkdir: $!";
my $home_trash = [ "$home/.local", "$home/.config" ];
my $cleanup = sub { rmtree([@$home_trash, @_]) };
+my $config_file = "$home/.config/lei/config";
+my $store_dir = "$home/.local/share/lei";
my $test_help = sub {
ok(!$lei->([], undef, $opt), 'no args fails');
@@ -118,10 +121,71 @@ my $test_config = sub {
ok(!-f "$home/config/f", 'no file created');
};
+my $setup_publicinboxes = sub {
+ state $done = '';
+ return if $done eq $home;
+ use PublicInbox::InboxWritable;
+ for my $V (1, 2) {
+ run_script([qw(-init -Lmedium), "-V$V", "t$V",
+ '--newsgroup', "t.$V",
+ "$home/t$V", "http://example.com/t$V",
+ "t$V\@example.com" ]) or BAIL_OUT "init v$V";
+ }
+ my $cfg = PublicInbox::Config->new;
+ my $seen = 0;
+ $cfg->each_inbox(sub {
+ my ($ibx) = @_;
+ my $im = PublicInbox::InboxWritable->new($ibx)->importer(0);
+ my $V = $ibx->version;
+ my @eml = glob('t/*.eml');
+ push(@eml, 't/data/0001.patch') if $V == 2;
+ for (@eml) {
+ next if $_ eq 't/psgi_v2-old.eml'; # dup mid
+ $im->add(eml_load($_)) or BAIL_OUT "v$V add $_";
+ $seen++;
+ }
+ $im->done;
+ if ($V == 1) {
+ run_script(['-index', $ibx->{inboxdir}]) or
+ BAIL_OUT 'index v1';
+ }
+ });
+ $done = $home;
+ $seen || BAIL_OUT 'no imports';
+};
+
+my $test_extinbox = sub {
+ $setup_publicinboxes->();
+ $cleanup->();
+ $lei->('ls-extinbox');
+ is($out.$err, '', 'ls-extinbox no output, yet');
+ ok(!-e $config_file && !-e $store_dir,
+ 'nothing created by ls-extinbox');
+
+ my $cfg = PublicInbox::Config->new;
+ $cfg->each_inbox(sub {
+ my ($ibx) = @_;
+ ok($lei->(qw(add-extinbox -q), $ibx->{inboxdir}),
+ 'added extinbox');
+ is($out.$err, '', 'no output');
+ });
+ ok(-s $config_file && -e $store_dir,
+ 'add-extinbox created config + store');
+ my $lcfg = PublicInbox::Config->new($config_file);
+ $cfg->each_inbox(sub {
+ my ($ibx) = @_;
+ is($lcfg->{"extinbox.$ibx->{inboxdir}.boost"}, 0,
+ "configured boost on $ibx->{name}");
+ });
+ $lei->('ls-extinbox');
+ like($out, qr/boost=0\n/s, 'ls-extinbox has output');
+};
+
my $test_lei_common = sub {
$test_help->();
$test_config->();
$test_init->();
+ $test_extinbox->();
};
my $test_lei_oneshot = $ENV{TEST_LEI_ONESHOT};
next prev parent reply other threads:[~2020-12-18 12:09 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-12-18 12:09 [PATCH 00/26] lei: basic UI + IPC work Eric Wong
2020-12-18 12:09 ` [PATCH 01/26] lei: FD-passing and IPC basics Eric Wong
2020-12-18 12:09 ` [PATCH 02/26] lei: proposed command-listing and options Eric Wong
2021-02-18 20:42 ` lei q --save-as=... requires too much thinking Eric Wong
2020-12-18 12:09 ` [PATCH 03/26] lei_store: local storage for Local Email Interface Eric Wong
2020-12-18 12:09 ` [PATCH 04/26] tests: more common JSON module loading Eric Wong
2020-12-18 12:09 ` [PATCH 05/26] lei: use spawn (vfork + execve) for lazy start Eric Wong
2020-12-18 12:09 ` [PATCH 06/26] lei: refine help/option parsing, implement "init" Eric Wong
2020-12-18 12:09 ` [PATCH 07/26] t/lei-oneshot: standalone oneshot (non-socket) test Eric Wong
2020-12-18 12:09 ` [PATCH 08/26] lei: ensure we run a restrictive umask Eric Wong
2020-12-18 12:09 ` [PATCH 09/26] lei: support `daemon-env' for modifying long-lived env Eric Wong
2020-12-18 12:09 ` [PATCH 10/26] lei_store: simplify git_epoch_max, slightly Eric Wong
2020-12-18 12:09 ` [PATCH 11/26] search: simplify initialization, add ->xdb_shards_flat Eric Wong
2020-12-18 12:09 ` [PATCH 12/26] rename LeiDaemon package to PublicInbox::LEI Eric Wong
2020-12-18 12:09 ` [PATCH 13/26] lei: support pass-through for `lei config' Eric Wong
2020-12-18 12:09 ` [PATCH 14/26] lei: help: show actual paths being operated on Eric Wong
2020-12-18 12:09 ` [PATCH 15/26] lei: rename $client => $self and bless Eric Wong
2020-12-18 12:09 ` [PATCH 16/26] lei: micro-optimize startup time Eric Wong
2020-12-18 12:09 ` [PATCH 17/26] lei_store: relax GIT_COMMITTER_IDENT check Eric Wong
2020-12-18 12:09 ` [PATCH 18/26] lei_store: keyword extraction from mbox and Maildir Eric Wong
2020-12-18 12:09 ` [PATCH 19/26] on_destroy: generic localized END Eric Wong
2020-12-18 12:09 ` [PATCH 20/26] lei: restore default __DIE__ handler for event loop Eric Wong
2020-12-18 12:09 ` [PATCH 21/26] lei: drop $SIG{__DIE__}, add oneshot fallbacks Eric Wong
2020-12-18 12:09 ` [PATCH 22/26] lei: start working on bash completion Eric Wong
2020-12-18 12:09 ` [PATCH 23/26] build: add lei.sh + "make symlink-install" target Eric Wong
2020-12-18 12:09 ` [PATCH 24/26] lei: support for -$DIGIT and -$SIG CLI switches Eric Wong
2020-12-18 12:09 ` [PATCH 25/26] lei: revise output routines Eric Wong
2020-12-18 12:09 ` Eric Wong [this message]
2020-12-18 20:23 ` [PATCH 26/26] lei: extinbox: start implementing in config file Eric Wong
2020-12-27 20:02 ` [PATCH 27/26] lei_xsearch: cross-(inbox|extindex) search Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20201218120950.23272-27-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).