about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2020-12-18 11:34:38 +0000
committerEric Wong <e@80x24.org>2020-12-19 09:32:08 +0000
commit12583f45f29f3acd6cd704df9a7e5aaff5acc3f7 (patch)
tree8980efdc7a97bf1b894900e5bb15c6f725aafdc0
parent7ab46690f51a7f1f22299e4fd385a56e5bcddef7 (diff)
downloadpublic-inbox-12583f45f29f3acd6cd704df9a7e5aaff5acc3f7.tar.gz
They need to be indexed by MiscIdx, but MiscIdx
still needs more work to support faster config
loading when dealing with ~100K data sources.
-rw-r--r--MANIFEST1
-rw-r--r--lib/PublicInbox/LEI.pm19
-rw-r--r--lib/PublicInbox/LeiExtinbox.pm51
-rw-r--r--t/lei.t72
4 files changed, 130 insertions, 13 deletions
diff --git a/MANIFEST b/MANIFEST
index e2d4ef72..f0847e3c 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -162,6 +162,7 @@ lib/PublicInbox/InboxWritable.pm
 lib/PublicInbox/Isearch.pm
 lib/PublicInbox/KQNotify.pm
 lib/PublicInbox/LEI.pm
+lib/PublicInbox/LeiExtinbox.pm
 lib/PublicInbox/LeiSearch.pm
 lib/PublicInbox/LeiStore.pm
 lib/PublicInbox/Linkify.pm
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 97c5d91b..b254e2c5 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -8,7 +8,7 @@
 package PublicInbox::LEI;
 use strict;
 use v5.10.1;
-use parent qw(PublicInbox::DS);
+use parent qw(PublicInbox::DS PublicInbox::LeiExtinbox);
 use Getopt::Long ();
 use Socket qw(AF_UNIX SOCK_STREAM pack_sockaddr_un);
 use Errno qw(EAGAIN ECONNREFUSED ENOENT);
@@ -79,12 +79,12 @@ our %CMD = ( # sorted in order of importance/use:
 
 'add-extinbox' => [ 'URL_OR_PATHNAME',
         'add/set priority of a publicinbox|extindex for extra matches',
-        qw(prio=i) ],
+        qw(boost=i quiet|q) ],
 'ls-extinbox' => [ '[FILTER...]', 'list publicinbox|extindex locations',
-        qw(format|f=s z local remote) ],
+        qw(format|f=s z|0 local remote quiet|q) ],
 'forget-extinbox' => [ '{URL_OR_PATHNAME|--prune}',
         'exclude further results from a publicinbox|extindex',
-        qw(prune) ],
+        qw(prune quiet|q) ],
 
 'ls-query' => [ '[FILTER...]', 'list saved search queries',
                 qw(name-only format|f=s z) ],
@@ -107,7 +107,7 @@ our %CMD = ( # sorted in order of importance/use:
 
 # code repos are used for `show' to solve blobs from patch mails
 'add-coderepo' => [ 'PATHNAME', 'add or set priority of a git code repo',
-        qw(prio=i) ],
+        qw(boost=i) ],
 'ls-coderepo' => [ '[FILTER_TERMS...]',
                 'list known code repos', qw(format|f=s z) ],
 'forget-coderepo' => [ 'PATHNAME',
@@ -197,7 +197,7 @@ my %OPTDESC = (
 'sort|s=s@' => [ 'VAL|internaldate,date,relevance,docid',
                 "order of results `--output'-dependent"],
 
-'prio=i' => 'priority of query source',
+'boost=i' => 'increase/decrease priority of results (default: 0)',
 
 'local' => 'limit operations to the local filesystem',
 'local!' => 'exclude results from the local filesystem',
@@ -217,8 +217,7 @@ my %OPTDESC = (
 'by-mid|mid:s' => [ 'MID', 'match only by Message-ID, ignoring contents' ],
 'jobs:i' => 'set parallelism level',
 
-# xargs, env, use "-0", git(1) uses "-z".  Should we support z|0 everywhere?
-'z' => 'use NUL \\0 instead of newline (CR) to delimit lines',
+# xargs, env, use "-0", git(1) uses "-z".  We support z|0 everywhere
 'z|0' => 'use NUL \\0 instead of newline (CR) to delimit lines',
 
 # note: no "--ignore-environment" / "-i" support like env(1) since that
@@ -455,7 +454,9 @@ sub _lei_store ($;$) {
         $cfg->{-lei_store} //= do {
                 require PublicInbox::LeiStore;
                 PublicInbox::SearchIdx::load_xapian_writable();
-                defined(my $dir = $cfg->{'leistore.dir'}) or return;
+                my $dir = $cfg->{'leistore.dir'};
+                $dir //= _store_path($self->{env}) if $creat;
+                return unless $dir;
                 PublicInbox::LeiStore->new($dir, { creat => $creat });
         };
 }
diff --git a/lib/PublicInbox/LeiExtinbox.pm b/lib/PublicInbox/LeiExtinbox.pm
new file mode 100644
index 00000000..c2de7735
--- /dev/null
+++ b/lib/PublicInbox/LeiExtinbox.pm
@@ -0,0 +1,51 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# *-extinbox commands of lei
+package PublicInbox::LeiExtinbox;
+use strict;
+use v5.10.1;
+use parent qw(Exporter);
+our @EXPORT = qw(lei_ls_extinbox lei_add_extinbox lei_forget_extinbox);
+
+sub lei_ls_extinbox {
+        my ($self, @argv) = @_;
+        my $stor = $self->_lei_store(0);
+        my $cfg = $self->_lei_cfg(0);
+        my $out = $self->{1};
+        my ($OFS, $ORS) = $self->{opt}->{z} ? ("\0", "\0\0") : (" ", "\n");
+        my (%boost, @loc);
+        for my $sec (grep(/\Aextinbox\./, @{$cfg->{-section_order}})) {
+                my $loc = substr($sec, length('extinbox.'));
+                $boost{$loc} = $cfg->{"$sec.boost"};
+                push @loc, $loc;
+        }
+        use sort 'stable';
+        # highest boost first, but stable for alphabetic tie break
+        for (sort { $boost{$b} <=> $boost{$a} } sort keys %boost) {
+                # TODO: use miscidx and show docid so forget/set is easier
+                print $out $_, $OFS, 'boost=', $boost{$_}, $ORS;
+        }
+}
+
+sub lei_add_extinbox {
+        my ($self, $url_or_dir) = @_;
+        my $cfg = $self->_lei_cfg(1);
+        if ($url_or_dir !~ m!\Ahttps?://!) {
+                $url_or_dir = File::Spec->canonpath($url_or_dir);
+        }
+        my $new_boost = $self->{opt}->{boost} // 0;
+        my $key = "extinbox.$url_or_dir.boost";
+        my $cur_boost = $cfg->{$key};
+        return if defined($cur_boost) && $cur_boost == $new_boost; # idempotent
+        $self->lei_config($key, $new_boost);
+        my $stor = $self->_lei_store(1);
+        # TODO: add to MiscIdx
+        $stor->done;
+}
+
+sub lei_forget_extinbox {
+        # TODO
+}
+
+1;
diff --git a/t/lei.t b/t/lei.t
index 30f9d2b6..a95a0efc 100644
--- a/t/lei.t
+++ b/t/lei.t
@@ -7,17 +7,18 @@ use Test::More;
 use PublicInbox::TestCommon;
 use PublicInbox::Config;
 use File::Path qw(rmtree);
+require_git 2.6;
 require_mods(qw(json DBD::SQLite Search::Xapian));
 my $LEI = 'lei';
 my $opt = { 1 => \(my $out = ''), 2 => \(my $err = '') };
 my $lei = sub {
-        my ($cmd, $env, $opt) = @_;
+        my ($cmd, $env, $xopt) = @_;
         $out = $err = '';
         if (!ref($cmd)) {
-                ($env, $opt) = grep { (!defined) || ref } @_;
-                $cmd = [ grep { defined } @_ ];
+                ($env, $xopt) = grep { (!defined) || ref } @_;
+                $cmd = [ grep { defined && !ref } @_ ];
         }
-        run_script([$LEI, @$cmd], $env, $opt);
+        run_script([$LEI, @$cmd], $env, $xopt // $opt);
 };
 
 my ($home, $for_destroy) = tmpdir();
@@ -29,6 +30,8 @@ local $ENV{FOO} = 'BAR';
 mkdir "$home/xdg_run", 0700 or BAIL_OUT "mkdir: $!";
 my $home_trash = [ "$home/.local", "$home/.config" ];
 my $cleanup = sub { rmtree([@$home_trash, @_]) };
+my $config_file = "$home/.config/lei/config";
+my $store_dir = "$home/.local/share/lei";
 
 my $test_help = sub {
         ok(!$lei->([], undef, $opt), 'no args fails');
@@ -118,10 +121,71 @@ my $test_config = sub {
         ok(!-f "$home/config/f", 'no file created');
 };
 
+my $setup_publicinboxes = sub {
+        state $done = '';
+        return if $done eq $home;
+        use PublicInbox::InboxWritable;
+        for my $V (1, 2) {
+                run_script([qw(-init -Lmedium), "-V$V", "t$V",
+                                '--newsgroup', "t.$V",
+                                "$home/t$V", "http://example.com/t$V",
+                                "t$V\@example.com" ]) or BAIL_OUT "init v$V";
+        }
+        my $cfg = PublicInbox::Config->new;
+        my $seen = 0;
+        $cfg->each_inbox(sub {
+                my ($ibx) = @_;
+                my $im = PublicInbox::InboxWritable->new($ibx)->importer(0);
+                my $V = $ibx->version;
+                my @eml = glob('t/*.eml');
+                push(@eml, 't/data/0001.patch') if $V == 2;
+                for (@eml) {
+                        next if $_ eq 't/psgi_v2-old.eml'; # dup mid
+                        $im->add(eml_load($_)) or BAIL_OUT "v$V add $_";
+                        $seen++;
+                }
+                $im->done;
+                if ($V == 1) {
+                        run_script(['-index', $ibx->{inboxdir}]) or
+                                BAIL_OUT 'index v1';
+                }
+        });
+        $done = $home;
+        $seen || BAIL_OUT 'no imports';
+};
+
+my $test_extinbox = sub {
+        $setup_publicinboxes->();
+        $cleanup->();
+        $lei->('ls-extinbox');
+        is($out.$err, '', 'ls-extinbox no output, yet');
+        ok(!-e $config_file && !-e $store_dir,
+                'nothing created by ls-extinbox');
+
+        my $cfg = PublicInbox::Config->new;
+        $cfg->each_inbox(sub {
+                my ($ibx) = @_;
+                ok($lei->(qw(add-extinbox -q), $ibx->{inboxdir}),
+                        'added extinbox');
+                is($out.$err, '', 'no output');
+        });
+        ok(-s $config_file && -e $store_dir,
+                'add-extinbox created config + store');
+        my $lcfg = PublicInbox::Config->new($config_file);
+        $cfg->each_inbox(sub {
+                my ($ibx) = @_;
+                is($lcfg->{"extinbox.$ibx->{inboxdir}.boost"}, 0,
+                        "configured boost on $ibx->{name}");
+        });
+        $lei->('ls-extinbox');
+        like($out, qr/boost=0\n/s, 'ls-extinbox has output');
+};
+
 my $test_lei_common = sub {
         $test_help->();
         $test_config->();
         $test_init->();
+        $test_extinbox->();
 };
 
 my $test_lei_oneshot = $ENV{TEST_LEI_ONESHOT};