From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-3.9 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id B283920078 for ; Wed, 3 Feb 2021 08:11:44 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 06/11] lei q: -I/--exclude/--only support globs and basenames Date: Tue, 2 Feb 2021 22:11:38 -1000 Message-Id: <20210203081143.24424-7-e@80x24.org> In-Reply-To: <20210203081143.24424-1-e@80x24.org> References: <20210203081143.24424-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: We can do basename matching when it's unambiguous. Since '*?[]' characters are rare in URLs and pathnames, we'll do glob matching by default to support a (curl-inspired) --globoff/-g option to disable globbing. And fix --exclude while we're at it --- lib/PublicInbox/LEI.pm | 3 ++- lib/PublicInbox/LeiExternal.pm | 38 +++++++++++++++++++++++++++++++++- lib/PublicInbox/LeiQuery.pm | 14 ++++++++----- 3 files changed, 48 insertions(+), 7 deletions(-) diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index 05a39cad..3cb7a327 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -104,7 +104,7 @@ our %CMD = ( # sorted in order of importance/use: 'q' => [ 'SEARCH_TERMS...', 'search for messages matching terms', qw( save-as=s output|mfolder|o=s format|f=s dedupe|d=s thread|t augment|a sort|s=s reverse|r offset=i remote! local! external! pretty - include|I=s@ exclude=s@ only=s@ jobs|j=s + include|I=s@ exclude=s@ only=s@ jobs|j=s globoff|g mua-cmd|mua=s no-torsocks torsocks=s verbose|v quiet|q received-after=s received-before=s sent-after=s sent-since=s), PublicInbox::LeiQuery::curl_opt(), opt_dash('limit|n=i', '[0-9]+') ], @@ -201,6 +201,7 @@ my $ls_format = [ 'OUT|plain|json|null', 'listing output format' ]; my %OPTDESC = ( 'help|h' => 'show this built-in help', 'quiet|q' => 'be quiet', +'globoff|g' => "do not match locations using '*?' wildcards and '[]' ranges", 'verbose|v' => 'be more verbose', 'solve!' => 'do not attempt to reconstruct blobs from emails', 'torsocks=s' => ['auto|no|yes', diff --git a/lib/PublicInbox/LeiExternal.pm b/lib/PublicInbox/LeiExternal.pm index 3853cfc1..6b4c7fb0 100644 --- a/lib/PublicInbox/LeiExternal.pm +++ b/lib/PublicInbox/LeiExternal.pm @@ -39,7 +39,7 @@ sub lei_ls_external { } sub ext_canonicalize { - my ($location) = $_[-1]; + my ($location) = @_; if ($location !~ m!\Ahttps?://!) { PublicInbox::Config::rel2abs_collapsed($location); } else { @@ -52,6 +52,42 @@ sub ext_canonicalize { } } +my %patmap = ('*' => '[^/]*?', '?' => '[^/]', '[' => '[', ']' => ']'); +sub glob2pat { + my ($glob) = @_; + $glob =~ s!(.)!$patmap{$1} || "\Q$1"!ge; + $glob; +} + +sub get_externals { + my ($self, $loc, $exclude) = @_; + return (ext_canonicalize($loc)) if -e $loc; + + my @m; + my @cur = externals_each($self); + my $do_glob = !$self->{opt}->{globoff}; # glob by default + if ($do_glob && ($loc =~ /[\*\?]/s || $loc =~ /\[.*\]/s)) { + my $re = glob2pat($loc); + @m = grep(m!$re!, @cur); + return @m if scalar(@m); + } elsif (index($loc, '/') < 0) { # exact basename match: + @m = grep(m!/\Q$loc\E/?\z!, @cur); + return @m if scalar(@m) == 1; + } elsif ($exclude) { # URL, maybe: + my $canon = ext_canonicalize($loc); + @m = grep(m!\A\Q$canon\E\z!, @cur); + return @m if scalar(@m) == 1; + } else { # URL: + return (ext_canonicalize($loc)); + } + if (scalar(@m) == 0) { + $self->fail("`$loc' is unknown"); + } else { + $self->fail("`$loc' is ambiguous:\n", map { "\t$_\n" } @m); + } + (); +} + sub lei_add_external { my ($self, $location) = @_; my $cfg = $self->_lei_cfg(1); diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm index 72a67c24..10b8d6fa 100644 --- a/lib/PublicInbox/LeiQuery.pm +++ b/lib/PublicInbox/LeiQuery.pm @@ -31,17 +31,21 @@ sub lei_q { } if (@only) { for my $loc (@only) { - $lxs->prepare_external($self->ext_canonicalize($loc)); + my @loc = $self->get_externals($loc) or return; + $lxs->prepare_external($_) for @loc; } } else { for my $loc (@{$opt->{include} // []}) { - $lxs->prepare_external($self->ext_canonicalize($loc)); + my @loc = $self->get_externals($loc) or return; + $lxs->prepare_external($_) for @loc; } # --external is enabled by default, but allow --no-external if ($opt->{external} //= 1) { - my %x = map {; - ($self->ext_canonicalize($_), 1) - } @{$self->{exclude} // []}; + my %x; + for my $loc (@{$opt->{exclude} // []}) { + my @l = $self->get_externals($loc, 1) or return; + $x{$_} = 1 for @l; + } my $ne = $self->externals_each(\&prep_ext, $lxs, \%x); $opt->{remote} //= !($lxs->locals - $opt->{'local'}); if ($opt->{'local'}) {