From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 11/12] lei: forget-external support with canonicalization
Date: Thu, 21 Jan 2021 19:46:23 +0000 [thread overview]
Message-ID: <20210121194624.32002-12-e@80x24.org> (raw)
In-Reply-To: <20210121194624.32002-1-e@80x24.org>
For proper matching, we'll do a better job canonicalizing
URLs and path names for matching. Of course, users may edit
the file outside of lei, so ensure we try both the canonicalized
and as-is form provided by the user.
I also don't think we'll need to store externals info in
MiscIdx; just the config file is fine.
---
MANIFEST | 1 +
lib/PublicInbox/LEI.pm | 24 ++++++++++-----
lib/PublicInbox/LeiExternal.pm | 54 +++++++++++++++++++++++++++-------
t/lei.t | 9 ++++++
t/lei_external.t | 18 ++++++++++++
5 files changed, 88 insertions(+), 18 deletions(-)
create mode 100644 t/lei_external.t
diff --git a/MANIFEST b/MANIFEST
index 0de1de4a..ddee1539 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -339,6 +339,7 @@ t/kqnotify.t
t/lei-oneshot.t
t/lei.t
t/lei_dedupe.t
+t/lei_external.t
t/lei_overview.t
t/lei_store.t
t/lei_to_mail.t
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 37b45a00..9c3d7279 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -21,7 +21,7 @@ use PublicInbox::Config;
use PublicInbox::Syscall qw(SFD_NONBLOCK EPOLLIN EPOLLET);
use PublicInbox::Sigfd;
use PublicInbox::DS qw(now dwaitpid);
-use PublicInbox::Spawn qw(spawn run_die popen_rd);
+use PublicInbox::Spawn qw(spawn popen_rd);
use PublicInbox::OnDestroy;
use Text::Wrap qw(wrap);
use File::Path qw(mkpath);
@@ -95,7 +95,7 @@ our %CMD = ( # sorted in order of importance/use:
qw(boost=i quiet|q) ],
'ls-external' => [ '[FILTER...]', 'list publicinbox|extindex locations',
qw(format|f=s z|0 local remote quiet|q) ],
-'forget-external' => [ '{URL_OR_PATHNAME|--prune}',
+'forget-external' => [ 'URL_OR_PATHNAME...|--prune',
'exclude further results from a publicinbox|extindex',
qw(prune quiet|q) ],
@@ -114,7 +114,7 @@ our %CMD = ( # sorted in order of importance/use:
"exclude message(s) on stdin from `q' search results",
qw(stdin| oid=s exact by-mid|mid:s quiet|q) ],
-'purge-mailsource' => [ '{URL_OR_PATHNAME|--all}',
+'purge-mailsource' => [ 'URL_OR_PATHNAME|--all',
'remove imported messages from IMAP, Maildirs, and MH',
qw(exact! all jobs:i indexed) ],
@@ -137,7 +137,7 @@ our %CMD = ( # sorted in order of importance/use:
'forget-watch' => [ '{WATCH_NUMBER|--prune}', 'stop and forget a watch',
qw(prune) ],
-'import' => [ '{URL_OR_PATHNAME|--stdin}',
+'import' => [ 'URL_OR_PATHNAME|--stdin',
'one-shot import/update from URL or filesystem',
qw(stdin| offset=i recursive|r exclude=s include=s !flags),
],
@@ -468,6 +468,7 @@ sub optparse ($$$) {
last;
} # else continue looping
}
+ last if $ok;
my $last = pop @or;
$err = join(', ', @or) . " or $last must be set";
} else {
@@ -547,16 +548,23 @@ sub lei_mark {
my ($self, @argv) = @_;
}
-sub lei_config {
+sub _config {
my ($self, @argv) = @_;
- $self->{opt}->{'config-file'} and return fail $self,
- "config file switches not supported by `lei config'";
my $env = $self->{env};
delete local $env->{GIT_CONFIG};
+ delete local $ENV{GIT_CONFIG};
my $cfg = _lei_cfg($self, 1);
my $cmd = [ qw(git config -f), $cfg->{'-f'}, @argv ];
my %rdr = map { $_ => $self->{$_} } (0..2);
- run_die($cmd, $env, \%rdr);
+ waitpid(spawn($cmd, $env, \%rdr), 0);
+}
+
+sub lei_config {
+ my ($self, @argv) = @_;
+ $self->{opt}->{'config-file'} and return fail $self,
+ "config file switches not supported by `lei config'";
+ _config(@_);
+ x_it($self, $?) if $?;
}
sub lei_init {
diff --git a/lib/PublicInbox/LeiExternal.pm b/lib/PublicInbox/LeiExternal.pm
index 64faf5a0..21071058 100644
--- a/lib/PublicInbox/LeiExternal.pm
+++ b/lib/PublicInbox/LeiExternal.pm
@@ -7,6 +7,7 @@ use strict;
use v5.10.1;
use parent qw(Exporter);
our @EXPORT = qw(lei_ls_external lei_add_external lei_forget_external);
+use PublicInbox::Config;
sub _externals_each {
my ($self, $cb, @arg) = @_;
@@ -30,7 +31,6 @@ sub _externals_each {
sub lei_ls_external {
my ($self, @argv) = @_;
- my $stor = $self->_lei_store(0);
my $out = $self->{1};
my ($OFS, $ORS) = $self->{opt}->{z} ? ("\0", "\0\0") : (" ", "\n");
$self->_externals_each(sub {
@@ -39,24 +39,58 @@ sub lei_ls_external {
});
}
+sub _canonicalize {
+ my ($location) = @_;
+ if ($location !~ m!\Ahttps?://!) {
+ PublicInbox::Config::rel2abs_collapsed($location);
+ } else {
+ require URI;
+ my $uri = URI->new($location)->canonical;
+ my $path = $uri->path . '/';
+ $path =~ tr!/!/!s; # squeeze redundant '/'
+ $uri->path($path);
+ $uri->as_string;
+ }
+}
+
sub lei_add_external {
- my ($self, $url_or_dir) = @_;
+ my ($self, $location) = @_;
my $cfg = $self->_lei_cfg(1);
- if ($url_or_dir !~ m!\Ahttps?://!) {
- $url_or_dir = File::Spec->canonpath($url_or_dir);
- }
my $new_boost = $self->{opt}->{boost} // 0;
- my $key = "external.$url_or_dir.boost";
+ $location = _canonicalize($location);
+ my $key = "external.$location.boost";
my $cur_boost = $cfg->{$key};
return if defined($cur_boost) && $cur_boost == $new_boost; # idempotent
$self->lei_config($key, $new_boost);
- my $stor = $self->_lei_store(1);
- # TODO: add to MiscIdx
- $stor->done;
+ $self->_lei_store(1)->done; # just create the store
}
sub lei_forget_external {
- # TODO
+ my ($self, @locations) = @_;
+ my $cfg = $self->_lei_cfg(1);
+ my $quiet = $self->{opt}->{quiet};
+ for my $loc (@locations) {
+ my (@unset, @not_found);
+ for my $l ($loc, _canonicalize($loc)) {
+ my $key = "external.$l.boost";
+ delete($cfg->{$key});
+ $self->_config('--unset', $key);
+ if ($? == 0) {
+ push @unset, $key;
+ } elsif (($? >> 8) == 5) {
+ push @not_found, $key;
+ } else {
+ $self->err("# --unset $key error");
+ return $self->x_it($?);
+ }
+ }
+ if (@unset) {
+ next if $quiet;
+ $self->err("# $_ unset") for @unset;
+ } elsif (@not_found) {
+ $self->err("# $_ not found") for @not_found;
+ } # else { already exited
+ }
}
1;
diff --git a/t/lei.t b/t/lei.t
index ef820fe3..50ad2bb1 100644
--- a/t/lei.t
+++ b/t/lei.t
@@ -180,6 +180,15 @@ my $test_external = sub {
});
$lei->('ls-external');
like($out, qr/boost=0\n/s, 'ls-external has output');
+ ok($lei->(qw(add-external -q https://EXAMPLE.com/ibx)), 'add remote');
+ is($err, '', 'no warnings after add-external');
+ $lei->('ls-external');
+ like($out, qr!https://example\.com/ibx/!s, 'added canonical URL');
+ is($err, '', 'no warnings on ls-external');
+ ok($lei->(qw(forget-external -q https://EXAMPLE.com/ibx)),
+ 'forget');
+ $lei->('ls-external');
+ unlike($out, qr!https://example\.com/ibx/!s, 'removed canonical URL');
ok(!$lei->(qw(q s:prefix -o /dev/null -f maildir)), 'bad maildir');
like($err, qr!/dev/null exists and is not a directory!,
diff --git a/t/lei_external.t b/t/lei_external.t
new file mode 100644
index 00000000..1f0048a1
--- /dev/null
+++ b/t/lei_external.t
@@ -0,0 +1,18 @@
+#!perl -w
+use strict;
+use v5.10.1;
+use Test::More;
+my $cls = 'PublicInbox::LeiExternal';
+require_ok $cls;
+my $canon = $cls->can('_canonicalize');
+my $exp = 'https://example.com/my-inbox/';
+is($canon->('https://example.com/my-inbox'), $exp, 'trailing slash added');
+is($canon->('https://example.com/my-inbox//'), $exp, 'trailing slash removed');
+is($canon->('https://example.com//my-inbox/'), $exp, 'leading slash removed');
+is($canon->('https://EXAMPLE.com/my-inbox/'), $exp, 'lowercased');
+is($canon->('/this/path/is/nonexistent/'), '/this/path/is/nonexistent',
+ 'non-existent pathname canonicalized');
+is($canon->('/this//path/'), '/this/path', 'extra slashes gone');
+is($canon->('/ALL/CAPS'), '/ALL/CAPS', 'caps preserved');
+
+done_testing;
next prev parent reply other threads:[~2021-01-21 19:46 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-01-21 19:46 [PATCH 00/12] lei: another dump Eric Wong
2021-01-21 19:46 ` [PATCH 01/12] lei_overview: rename {relevance} => {pct} Eric Wong
2021-01-21 19:46 ` [PATCH 02/12] lei q: retrieve keywords for local, non-external messages Eric Wong
2021-01-21 19:46 ` [PATCH 03/12] lei_xsearch: eliminate some unused, commented-out code Eric Wong
2021-01-21 19:46 ` [PATCH 04/12] lei: show {pct} and {oid} in From_ lines and filenames Eric Wong
2021-01-21 19:46 ` [PATCH 05/12] lei: fix inadvertant FD sharing Eric Wong
2021-01-21 19:46 ` [PATCH 06/12] lei_to_mail: avoid segfault on exit Eric Wong
2021-01-21 19:46 ` [PATCH 07/12] lei: oneshot: use client $io[2] for placeholder Eric Wong
2021-01-21 19:46 ` [PATCH 08/12] lei: remove INT/QUIT/TERM handlers, fix daemon EOF Eric Wong
2021-01-21 19:46 ` [PATCH 09/12] lei_xsearch: reduce reference paths to lxs Eric Wong
2021-01-21 19:46 ` [PATCH 10/12] lei: remove @TO_CLOSE_ATFORK_CHILD Eric Wong
2021-01-21 19:46 ` Eric Wong [this message]
2021-01-21 19:46 ` [PATCH 12/12] lei forget-external: bash completion support Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210121194624.32002-12-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).