user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 26/26] lei: extinbox: start implementing in config file
Date: Fri, 18 Dec 2020 12:09:50 +0000	[thread overview]
Message-ID: <20201218120950.23272-27-e@80x24.org> (raw)
In-Reply-To: <20201218120950.23272-1-e@80x24.org>

They need to be indexed by MiscIdx, but MiscIdx
still needs more work to support faster config
loading when dealing with ~100K data sources.
---
 lib/PublicInbox/LEI.pm         | 19 ++++-----
 lib/PublicInbox/LeiExtinbox.pm | 52 ++++++++++++++++++++++++
 t/lei.t                        | 72 ++++++++++++++++++++++++++++++++--
 3 files changed, 130 insertions(+), 13 deletions(-)
 create mode 100644 lib/PublicInbox/LeiExtinbox.pm

diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 97c5d91b..b254e2c5 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -8,7 +8,7 @@
 package PublicInbox::LEI;
 use strict;
 use v5.10.1;
-use parent qw(PublicInbox::DS);
+use parent qw(PublicInbox::DS PublicInbox::LeiExtinbox);
 use Getopt::Long ();
 use Socket qw(AF_UNIX SOCK_STREAM pack_sockaddr_un);
 use Errno qw(EAGAIN ECONNREFUSED ENOENT);
@@ -79,12 +79,12 @@ our %CMD = ( # sorted in order of importance/use:
 
 'add-extinbox' => [ 'URL_OR_PATHNAME',
 	'add/set priority of a publicinbox|extindex for extra matches',
-	qw(prio=i) ],
+	qw(boost=i quiet|q) ],
 'ls-extinbox' => [ '[FILTER...]', 'list publicinbox|extindex locations',
-	qw(format|f=s z local remote) ],
+	qw(format|f=s z|0 local remote quiet|q) ],
 'forget-extinbox' => [ '{URL_OR_PATHNAME|--prune}',
 	'exclude further results from a publicinbox|extindex',
-	qw(prune) ],
+	qw(prune quiet|q) ],
 
 'ls-query' => [ '[FILTER...]', 'list saved search queries',
 		qw(name-only format|f=s z) ],
@@ -107,7 +107,7 @@ our %CMD = ( # sorted in order of importance/use:
 
 # code repos are used for `show' to solve blobs from patch mails
 'add-coderepo' => [ 'PATHNAME', 'add or set priority of a git code repo',
-	qw(prio=i) ],
+	qw(boost=i) ],
 'ls-coderepo' => [ '[FILTER_TERMS...]',
 		'list known code repos', qw(format|f=s z) ],
 'forget-coderepo' => [ 'PATHNAME',
@@ -197,7 +197,7 @@ my %OPTDESC = (
 'sort|s=s@' => [ 'VAL|internaldate,date,relevance,docid',
 		"order of results `--output'-dependent"],
 
-'prio=i' => 'priority of query source',
+'boost=i' => 'increase/decrease priority of results (default: 0)',
 
 'local' => 'limit operations to the local filesystem',
 'local!' => 'exclude results from the local filesystem',
@@ -217,8 +217,7 @@ my %OPTDESC = (
 'by-mid|mid:s' => [ 'MID', 'match only by Message-ID, ignoring contents' ],
 'jobs:i' => 'set parallelism level',
 
-# xargs, env, use "-0", git(1) uses "-z".  Should we support z|0 everywhere?
-'z' => 'use NUL \\0 instead of newline (CR) to delimit lines',
+# xargs, env, use "-0", git(1) uses "-z".  We support z|0 everywhere
 'z|0' => 'use NUL \\0 instead of newline (CR) to delimit lines',
 
 # note: no "--ignore-environment" / "-i" support like env(1) since that
@@ -455,7 +454,9 @@ sub _lei_store ($;$) {
 	$cfg->{-lei_store} //= do {
 		require PublicInbox::LeiStore;
 		PublicInbox::SearchIdx::load_xapian_writable();
-		defined(my $dir = $cfg->{'leistore.dir'}) or return;
+		my $dir = $cfg->{'leistore.dir'};
+		$dir //= _store_path($self->{env}) if $creat;
+		return unless $dir;
 		PublicInbox::LeiStore->new($dir, { creat => $creat });
 	};
 }
diff --git a/lib/PublicInbox/LeiExtinbox.pm b/lib/PublicInbox/LeiExtinbox.pm
new file mode 100644
index 00000000..2f52b115
--- /dev/null
+++ b/lib/PublicInbox/LeiExtinbox.pm
@@ -0,0 +1,52 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# *-extinbox commands of lei
+package PublicInbox::LeiExtinbox;
+use strict;
+use v5.10.1;
+use parent qw(Exporter);
+our @EXPORT = qw(lei_ls_extinbox lei_add_extinbox lei_forget_extinbox);
+
+sub lei_ls_extinbox {
+	my ($self, @argv) = @_;
+	my $stor = $self->_lei_store(0);
+	my $cfg = $self->_lei_cfg(0);
+	my $out = $self->{1};
+	my ($OFS, $ORS) = $self->{opt}->{z} ? ("\0", "\0\0") : (" ", "\n");
+	my (%boost, @loc);
+	for my $sec (grep(/\Aextinbox\./, @{$cfg->{-section_order}})) {
+		my $loc = substr($sec, length('extinbox.'));
+		$boost{$loc} = $cfg->{"$sec.boost"};
+		push @loc, $loc;
+	}
+	my $out = $self->{1};
+	use sort 'stable';
+	# highest boost first, but stable for alphabetic tie break
+	for (sort { $boost{$b} <=> $boost{$a} } sort keys %boost) {
+		# TODO: use miscidx and show docid so forget/set is easier
+		print $out $_, $OFS, 'boost=', $boost{$_}, $ORS;
+	}
+}
+
+sub lei_add_extinbox {
+	my ($self, $url_or_dir) = @_;
+	my $cfg = $self->_lei_cfg(1);
+	if ($url_or_dir !~ m!\Ahttps?://!) {
+		$url_or_dir = File::Spec->canonpath($url_or_dir);
+	}
+	my $new_boost = $self->{opt}->{boost} // 0;
+	my $key = "extinbox.$url_or_dir.boost";
+	my $cur_boost = $cfg->{$key};
+	return if defined($cur_boost) && $cur_boost == $new_boost; # idempotent
+	$self->lei_config($key, $new_boost);
+	my $stor = $self->_lei_store(1);
+	# TODO: add to MiscIdx
+	$stor->done;
+}
+
+sub lei_forget_extinbox {
+	# TODO
+}
+
+1;
diff --git a/t/lei.t b/t/lei.t
index 30f9d2b6..a95a0efc 100644
--- a/t/lei.t
+++ b/t/lei.t
@@ -7,17 +7,18 @@ use Test::More;
 use PublicInbox::TestCommon;
 use PublicInbox::Config;
 use File::Path qw(rmtree);
+require_git 2.6;
 require_mods(qw(json DBD::SQLite Search::Xapian));
 my $LEI = 'lei';
 my $opt = { 1 => \(my $out = ''), 2 => \(my $err = '') };
 my $lei = sub {
-	my ($cmd, $env, $opt) = @_;
+	my ($cmd, $env, $xopt) = @_;
 	$out = $err = '';
 	if (!ref($cmd)) {
-		($env, $opt) = grep { (!defined) || ref } @_;
-		$cmd = [ grep { defined } @_ ];
+		($env, $xopt) = grep { (!defined) || ref } @_;
+		$cmd = [ grep { defined && !ref } @_ ];
 	}
-	run_script([$LEI, @$cmd], $env, $opt);
+	run_script([$LEI, @$cmd], $env, $xopt // $opt);
 };
 
 my ($home, $for_destroy) = tmpdir();
@@ -29,6 +30,8 @@ local $ENV{FOO} = 'BAR';
 mkdir "$home/xdg_run", 0700 or BAIL_OUT "mkdir: $!";
 my $home_trash = [ "$home/.local", "$home/.config" ];
 my $cleanup = sub { rmtree([@$home_trash, @_]) };
+my $config_file = "$home/.config/lei/config";
+my $store_dir = "$home/.local/share/lei";
 
 my $test_help = sub {
 	ok(!$lei->([], undef, $opt), 'no args fails');
@@ -118,10 +121,71 @@ my $test_config = sub {
 	ok(!-f "$home/config/f", 'no file created');
 };
 
+my $setup_publicinboxes = sub {
+	state $done = '';
+	return if $done eq $home;
+	use PublicInbox::InboxWritable;
+	for my $V (1, 2) {
+		run_script([qw(-init -Lmedium), "-V$V", "t$V",
+				'--newsgroup', "t.$V",
+				"$home/t$V", "http://example.com/t$V",
+				"t$V\@example.com" ]) or BAIL_OUT "init v$V";
+	}
+	my $cfg = PublicInbox::Config->new;
+	my $seen = 0;
+	$cfg->each_inbox(sub {
+		my ($ibx) = @_;
+		my $im = PublicInbox::InboxWritable->new($ibx)->importer(0);
+		my $V = $ibx->version;
+		my @eml = glob('t/*.eml');
+		push(@eml, 't/data/0001.patch') if $V == 2;
+		for (@eml) {
+			next if $_ eq 't/psgi_v2-old.eml'; # dup mid
+			$im->add(eml_load($_)) or BAIL_OUT "v$V add $_";
+			$seen++;
+		}
+		$im->done;
+		if ($V == 1) {
+			run_script(['-index', $ibx->{inboxdir}]) or
+				BAIL_OUT 'index v1';
+		}
+	});
+	$done = $home;
+	$seen || BAIL_OUT 'no imports';
+};
+
+my $test_extinbox = sub {
+	$setup_publicinboxes->();
+	$cleanup->();
+	$lei->('ls-extinbox');
+	is($out.$err, '', 'ls-extinbox no output, yet');
+	ok(!-e $config_file && !-e $store_dir,
+		'nothing created by ls-extinbox');
+
+	my $cfg = PublicInbox::Config->new;
+	$cfg->each_inbox(sub {
+		my ($ibx) = @_;
+		ok($lei->(qw(add-extinbox -q), $ibx->{inboxdir}),
+			'added extinbox');
+		is($out.$err, '', 'no output');
+	});
+	ok(-s $config_file && -e $store_dir,
+		'add-extinbox created config + store');
+	my $lcfg = PublicInbox::Config->new($config_file);
+	$cfg->each_inbox(sub {
+		my ($ibx) = @_;
+		is($lcfg->{"extinbox.$ibx->{inboxdir}.boost"}, 0,
+			"configured boost on $ibx->{name}");
+	});
+	$lei->('ls-extinbox');
+	like($out, qr/boost=0\n/s, 'ls-extinbox has output');
+};
+
 my $test_lei_common = sub {
 	$test_help->();
 	$test_config->();
 	$test_init->();
+	$test_extinbox->();
 };
 
 my $test_lei_oneshot = $ENV{TEST_LEI_ONESHOT};

  parent reply	other threads:[~2020-12-18 12:09 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-18 12:09 [PATCH 00/26] lei: basic UI + IPC work Eric Wong
2020-12-18 12:09 ` [PATCH 01/26] lei: FD-passing and IPC basics Eric Wong
2020-12-18 12:09 ` [PATCH 02/26] lei: proposed command-listing and options Eric Wong
2021-02-18 20:42   ` lei q --save-as=... requires too much thinking Eric Wong
2020-12-18 12:09 ` [PATCH 03/26] lei_store: local storage for Local Email Interface Eric Wong
2020-12-18 12:09 ` [PATCH 04/26] tests: more common JSON module loading Eric Wong
2020-12-18 12:09 ` [PATCH 05/26] lei: use spawn (vfork + execve) for lazy start Eric Wong
2020-12-18 12:09 ` [PATCH 06/26] lei: refine help/option parsing, implement "init" Eric Wong
2020-12-18 12:09 ` [PATCH 07/26] t/lei-oneshot: standalone oneshot (non-socket) test Eric Wong
2020-12-18 12:09 ` [PATCH 08/26] lei: ensure we run a restrictive umask Eric Wong
2020-12-18 12:09 ` [PATCH 09/26] lei: support `daemon-env' for modifying long-lived env Eric Wong
2020-12-18 12:09 ` [PATCH 10/26] lei_store: simplify git_epoch_max, slightly Eric Wong
2020-12-18 12:09 ` [PATCH 11/26] search: simplify initialization, add ->xdb_shards_flat Eric Wong
2020-12-18 12:09 ` [PATCH 12/26] rename LeiDaemon package to PublicInbox::LEI Eric Wong
2020-12-18 12:09 ` [PATCH 13/26] lei: support pass-through for `lei config' Eric Wong
2020-12-18 12:09 ` [PATCH 14/26] lei: help: show actual paths being operated on Eric Wong
2020-12-18 12:09 ` [PATCH 15/26] lei: rename $client => $self and bless Eric Wong
2020-12-18 12:09 ` [PATCH 16/26] lei: micro-optimize startup time Eric Wong
2020-12-18 12:09 ` [PATCH 17/26] lei_store: relax GIT_COMMITTER_IDENT check Eric Wong
2020-12-18 12:09 ` [PATCH 18/26] lei_store: keyword extraction from mbox and Maildir Eric Wong
2020-12-18 12:09 ` [PATCH 19/26] on_destroy: generic localized END Eric Wong
2020-12-18 12:09 ` [PATCH 20/26] lei: restore default __DIE__ handler for event loop Eric Wong
2020-12-18 12:09 ` [PATCH 21/26] lei: drop $SIG{__DIE__}, add oneshot fallbacks Eric Wong
2020-12-18 12:09 ` [PATCH 22/26] lei: start working on bash completion Eric Wong
2020-12-18 12:09 ` [PATCH 23/26] build: add lei.sh + "make symlink-install" target Eric Wong
2020-12-18 12:09 ` [PATCH 24/26] lei: support for -$DIGIT and -$SIG CLI switches Eric Wong
2020-12-18 12:09 ` [PATCH 25/26] lei: revise output routines Eric Wong
2020-12-18 12:09 ` Eric Wong [this message]
2020-12-18 20:23   ` [PATCH 26/26] lei: extinbox: start implementing in config file Eric Wong
2020-12-27 20:02   ` [PATCH 27/26] lei_xsearch: cross-(inbox|extindex) search Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201218120950.23272-27-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).