From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 972771FF9C for ; Thu, 21 Jan 2021 19:46:25 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 11/12] lei: forget-external support with canonicalization Date: Thu, 21 Jan 2021 19:46:23 +0000 Message-Id: <20210121194624.32002-12-e@80x24.org> In-Reply-To: <20210121194624.32002-1-e@80x24.org> References: <20210121194624.32002-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: For proper matching, we'll do a better job canonicalizing URLs and path names for matching. Of course, users may edit the file outside of lei, so ensure we try both the canonicalized and as-is form provided by the user. I also don't think we'll need to store externals info in MiscIdx; just the config file is fine. --- MANIFEST | 1 + lib/PublicInbox/LEI.pm | 24 ++++++++++----- lib/PublicInbox/LeiExternal.pm | 54 +++++++++++++++++++++++++++------- t/lei.t | 9 ++++++ t/lei_external.t | 18 ++++++++++++ 5 files changed, 88 insertions(+), 18 deletions(-) create mode 100644 t/lei_external.t diff --git a/MANIFEST b/MANIFEST index 0de1de4a..ddee1539 100644 --- a/MANIFEST +++ b/MANIFEST @@ -339,6 +339,7 @@ t/kqnotify.t t/lei-oneshot.t t/lei.t t/lei_dedupe.t +t/lei_external.t t/lei_overview.t t/lei_store.t t/lei_to_mail.t diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index 37b45a00..9c3d7279 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -21,7 +21,7 @@ use PublicInbox::Config; use PublicInbox::Syscall qw(SFD_NONBLOCK EPOLLIN EPOLLET); use PublicInbox::Sigfd; use PublicInbox::DS qw(now dwaitpid); -use PublicInbox::Spawn qw(spawn run_die popen_rd); +use PublicInbox::Spawn qw(spawn popen_rd); use PublicInbox::OnDestroy; use Text::Wrap qw(wrap); use File::Path qw(mkpath); @@ -95,7 +95,7 @@ our %CMD = ( # sorted in order of importance/use: qw(boost=i quiet|q) ], 'ls-external' => [ '[FILTER...]', 'list publicinbox|extindex locations', qw(format|f=s z|0 local remote quiet|q) ], -'forget-external' => [ '{URL_OR_PATHNAME|--prune}', +'forget-external' => [ 'URL_OR_PATHNAME...|--prune', 'exclude further results from a publicinbox|extindex', qw(prune quiet|q) ], @@ -114,7 +114,7 @@ our %CMD = ( # sorted in order of importance/use: "exclude message(s) on stdin from `q' search results", qw(stdin| oid=s exact by-mid|mid:s quiet|q) ], -'purge-mailsource' => [ '{URL_OR_PATHNAME|--all}', +'purge-mailsource' => [ 'URL_OR_PATHNAME|--all', 'remove imported messages from IMAP, Maildirs, and MH', qw(exact! all jobs:i indexed) ], @@ -137,7 +137,7 @@ our %CMD = ( # sorted in order of importance/use: 'forget-watch' => [ '{WATCH_NUMBER|--prune}', 'stop and forget a watch', qw(prune) ], -'import' => [ '{URL_OR_PATHNAME|--stdin}', +'import' => [ 'URL_OR_PATHNAME|--stdin', 'one-shot import/update from URL or filesystem', qw(stdin| offset=i recursive|r exclude=s include=s !flags), ], @@ -468,6 +468,7 @@ sub optparse ($$$) { last; } # else continue looping } + last if $ok; my $last = pop @or; $err = join(', ', @or) . " or $last must be set"; } else { @@ -547,16 +548,23 @@ sub lei_mark { my ($self, @argv) = @_; } -sub lei_config { +sub _config { my ($self, @argv) = @_; - $self->{opt}->{'config-file'} and return fail $self, - "config file switches not supported by `lei config'"; my $env = $self->{env}; delete local $env->{GIT_CONFIG}; + delete local $ENV{GIT_CONFIG}; my $cfg = _lei_cfg($self, 1); my $cmd = [ qw(git config -f), $cfg->{'-f'}, @argv ]; my %rdr = map { $_ => $self->{$_} } (0..2); - run_die($cmd, $env, \%rdr); + waitpid(spawn($cmd, $env, \%rdr), 0); +} + +sub lei_config { + my ($self, @argv) = @_; + $self->{opt}->{'config-file'} and return fail $self, + "config file switches not supported by `lei config'"; + _config(@_); + x_it($self, $?) if $?; } sub lei_init { diff --git a/lib/PublicInbox/LeiExternal.pm b/lib/PublicInbox/LeiExternal.pm index 64faf5a0..21071058 100644 --- a/lib/PublicInbox/LeiExternal.pm +++ b/lib/PublicInbox/LeiExternal.pm @@ -7,6 +7,7 @@ use strict; use v5.10.1; use parent qw(Exporter); our @EXPORT = qw(lei_ls_external lei_add_external lei_forget_external); +use PublicInbox::Config; sub _externals_each { my ($self, $cb, @arg) = @_; @@ -30,7 +31,6 @@ sub _externals_each { sub lei_ls_external { my ($self, @argv) = @_; - my $stor = $self->_lei_store(0); my $out = $self->{1}; my ($OFS, $ORS) = $self->{opt}->{z} ? ("\0", "\0\0") : (" ", "\n"); $self->_externals_each(sub { @@ -39,24 +39,58 @@ sub lei_ls_external { }); } +sub _canonicalize { + my ($location) = @_; + if ($location !~ m!\Ahttps?://!) { + PublicInbox::Config::rel2abs_collapsed($location); + } else { + require URI; + my $uri = URI->new($location)->canonical; + my $path = $uri->path . '/'; + $path =~ tr!/!/!s; # squeeze redundant '/' + $uri->path($path); + $uri->as_string; + } +} + sub lei_add_external { - my ($self, $url_or_dir) = @_; + my ($self, $location) = @_; my $cfg = $self->_lei_cfg(1); - if ($url_or_dir !~ m!\Ahttps?://!) { - $url_or_dir = File::Spec->canonpath($url_or_dir); - } my $new_boost = $self->{opt}->{boost} // 0; - my $key = "external.$url_or_dir.boost"; + $location = _canonicalize($location); + my $key = "external.$location.boost"; my $cur_boost = $cfg->{$key}; return if defined($cur_boost) && $cur_boost == $new_boost; # idempotent $self->lei_config($key, $new_boost); - my $stor = $self->_lei_store(1); - # TODO: add to MiscIdx - $stor->done; + $self->_lei_store(1)->done; # just create the store } sub lei_forget_external { - # TODO + my ($self, @locations) = @_; + my $cfg = $self->_lei_cfg(1); + my $quiet = $self->{opt}->{quiet}; + for my $loc (@locations) { + my (@unset, @not_found); + for my $l ($loc, _canonicalize($loc)) { + my $key = "external.$l.boost"; + delete($cfg->{$key}); + $self->_config('--unset', $key); + if ($? == 0) { + push @unset, $key; + } elsif (($? >> 8) == 5) { + push @not_found, $key; + } else { + $self->err("# --unset $key error"); + return $self->x_it($?); + } + } + if (@unset) { + next if $quiet; + $self->err("# $_ unset") for @unset; + } elsif (@not_found) { + $self->err("# $_ not found") for @not_found; + } # else { already exited + } } 1; diff --git a/t/lei.t b/t/lei.t index ef820fe3..50ad2bb1 100644 --- a/t/lei.t +++ b/t/lei.t @@ -180,6 +180,15 @@ my $test_external = sub { }); $lei->('ls-external'); like($out, qr/boost=0\n/s, 'ls-external has output'); + ok($lei->(qw(add-external -q https://EXAMPLE.com/ibx)), 'add remote'); + is($err, '', 'no warnings after add-external'); + $lei->('ls-external'); + like($out, qr!https://example\.com/ibx/!s, 'added canonical URL'); + is($err, '', 'no warnings on ls-external'); + ok($lei->(qw(forget-external -q https://EXAMPLE.com/ibx)), + 'forget'); + $lei->('ls-external'); + unlike($out, qr!https://example\.com/ibx/!s, 'removed canonical URL'); ok(!$lei->(qw(q s:prefix -o /dev/null -f maildir)), 'bad maildir'); like($err, qr!/dev/null exists and is not a directory!, diff --git a/t/lei_external.t b/t/lei_external.t new file mode 100644 index 00000000..1f0048a1 --- /dev/null +++ b/t/lei_external.t @@ -0,0 +1,18 @@ +#!perl -w +use strict; +use v5.10.1; +use Test::More; +my $cls = 'PublicInbox::LeiExternal'; +require_ok $cls; +my $canon = $cls->can('_canonicalize'); +my $exp = 'https://example.com/my-inbox/'; +is($canon->('https://example.com/my-inbox'), $exp, 'trailing slash added'); +is($canon->('https://example.com/my-inbox//'), $exp, 'trailing slash removed'); +is($canon->('https://example.com//my-inbox/'), $exp, 'leading slash removed'); +is($canon->('https://EXAMPLE.com/my-inbox/'), $exp, 'lowercased'); +is($canon->('/this/path/is/nonexistent/'), '/this/path/is/nonexistent', + 'non-existent pathname canonicalized'); +is($canon->('/this//path/'), '/this/path', 'extra slashes gone'); +is($canon->('/ALL/CAPS'), '/ALL/CAPS', 'caps preserved'); + +done_testing;