diff options
Diffstat (limited to 'lib/PublicInbox/LeiExternal.pm')
-rw-r--r-- | lib/PublicInbox/LeiExternal.pm | 218 |
1 files changed, 27 insertions, 191 deletions
diff --git a/lib/PublicInbox/LeiExternal.pm b/lib/PublicInbox/LeiExternal.pm index f8e610ca..31b9bd1e 100644 --- a/lib/PublicInbox/LeiExternal.pm +++ b/lib/PublicInbox/LeiExternal.pm @@ -1,11 +1,11 @@ -# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # *-external commands of lei package PublicInbox::LeiExternal; use strict; use v5.10.1; -use PublicInbox::Config; +use PublicInbox::Config qw(glob2re); sub externals_each { my ($self, $cb, @arg) = @_; @@ -31,7 +31,7 @@ sub externals_each { } sub ext_canonicalize { - my ($location) = @_; + my $location = $_[-1]; # $_[0] may be $lei if ($location !~ m!\Ahttps?://!) { PublicInbox::Config::rel2abs_collapsed($location); } else { @@ -44,40 +44,6 @@ sub ext_canonicalize { } } -# TODO: we will probably extract glob2re into a separate module for -# PublicInbox::Filter::Base and maybe other places -my %re_map = ( '*' => '[^/]*?', '?' => '[^/]', - '[' => '[', ']' => ']', ',' => ',' ); - -sub glob2re { - my $re = $_[-1]; - my $p = ''; - my $in_bracket = 0; - my $qm = 0; - my $schema_host_port = ''; - - # don't glob URL-looking things that look like IPv6 - if ($re =~ s!\A([a-z0-9\+]+://\[[a-f0-9\:]+\](?::[0-9]+)?/)!!i) { - $schema_host_port = quotemeta $1; # "http://[::1]:1234" - } - my $changes = ($re =~ s!(.)! - $re_map{$p eq '\\' ? '' : do { - if ($1 eq '[') { ++$in_bracket } - elsif ($1 eq ']') { --$in_bracket } - elsif ($1 eq ',') { ++$qm } # no change - $p = $1; - }} // do { - $p = $1; - ($p eq '-' && $in_bracket) ? $p : (++$qm, "\Q$p") - }!sge); - # bashism (also supported by curl): {a,b,c} => (a|b|c) - $changes += ($re =~ s/([^\\]*)\\\{([^,]*,[^\\]*)\\\}/ - (my $in_braces = $2) =~ tr!,!|!; - $1."($in_braces)"; - /sge); - ($changes - $qm) ? $schema_host_port.$re : undef; -} - # get canonicalized externals list matching $loc # $is_exclude denotes it's for --exclude # otherwise it's for --only/--include is assumed @@ -88,7 +54,7 @@ sub get_externals { my @cur = externals_each($self); my $do_glob = !$self->{opt}->{globoff}; # glob by default if ($do_glob && (my $re = glob2re($loc))) { - @m = grep(m!$re!, @cur); + @m = grep(m!$re/?\z!, @cur); return @m if scalar(@m); } elsif (index($loc, '/') < 0) { # exact basename match: @m = grep(m!/\Q$loc\E/?\z!, @cur); @@ -101,183 +67,53 @@ sub get_externals { return (ext_canonicalize($loc)); } if (scalar(@m) == 0) { - $self->fail("`$loc' is unknown"); - } else { - $self->fail("`$loc' is ambiguous:\n", map { "\t$_\n" } @m); - } - (); -} - -# TODO: does this need JSON output? -sub lei_ls_external { - my ($self, $filter) = @_; - my $opt = $self->{opt}; - my $do_glob = !$opt->{globoff}; # glob by default - my ($OFS, $ORS) = $opt->{z} ? ("\0", "\0\0") : (" ", "\n"); - $filter //= '*'; - my $re = $do_glob ? glob2re($filter) : undef; - $re //= index($filter, '/') < 0 ? - qr!/\Q$filter\E/?\z! : # exact basename match - qr/\Q$filter\E/; # grep -F semantics - my @ext = externals_each($self, my $boost = {}); - @ext = $opt->{'invert-match'} ? grep(!/$re/, @ext) - : grep(/$re/, @ext); - if ($opt->{'local'} && !$opt->{remote}) { - @ext = grep(!m!\A[a-z\+]+://!, @ext); - } elsif ($opt->{remote} && !$opt->{'local'}) { - @ext = grep(m!\A[a-z\+]+://!, @ext); - } - for my $loc (@ext) { - $self->out($loc, $OFS, 'boost=', $boost->{$loc}, $ORS); - } -} - -sub add_external_finish { - my ($self, $location) = @_; - my $cfg = $self->_lei_cfg(1); - my $new_boost = $self->{opt}->{boost} // 0; - my $key = "external.$location.boost"; - my $cur_boost = $cfg->{$key}; - return if defined($cur_boost) && $cur_boost == $new_boost; # idempotent - $self->_config($key, $new_boost); -} - -sub lei_add_external { - my ($self, $location) = @_; - my $opt = $self->{opt}; - my $mirror = $opt->{mirror} // do { - my @fail; - for my $sw ($self->index_opt, $self->curl_opt, - qw(no-torsocks torsocks inbox-version)) { - my ($f) = (split(/|/, $sw, 2))[0]; - next unless defined $opt->{$f}; - $f = length($f) == 1 ? "-$f" : "--$f"; - push @fail, $f; - } - if (scalar(@fail) == 1) { - return $self->("@fail requires --mirror"); - } elsif (@fail) { - my $last = pop @fail; - my $fail = join(', ', @fail); - return $self->("@fail and $last require --mirror"); - } - undef; - }; - my $new_boost = $opt->{boost} // 0; - $location = ext_canonicalize($location); - if (defined($mirror) && -d $location) { - $self->fail(<<""); # TODO: did you mean "update-external?" ---mirror destination `$location' already exists - - } elsif (-d $location) { - index($location, "\n") >= 0 and - return $self->fail("`\\n' not allowed in `$location'"); - } - if ($location !~ m!\Ahttps?://! && !-d $location) { - $mirror // return $self->fail("$location not a directory"); - index($location, "\n") >= 0 and - return $self->fail("`\\n' not allowed in `$location'"); - $mirror = ext_canonicalize($mirror); - require PublicInbox::LeiMirror; - PublicInbox::LeiMirror->start($self, $mirror => $location); + die "`$loc' is unknown\n"; } else { - add_external_finish($self, $location); + die("`$loc' is ambiguous:\n", map { "\t$_\n" } @m, "\n"); } } -sub lei_forget_external { - my ($self, @locations) = @_; - my $cfg = $self->_lei_cfg(1); - my $quiet = $self->{opt}->{quiet}; - my %seen; - for my $loc (@locations) { - my (@unset, @not_found); - for my $l ($loc, ext_canonicalize($loc)) { - next if $seen{$l}++; - my $key = "external.$l.boost"; - delete($cfg->{$key}); - $self->_config('--unset', $key); - if ($? == 0) { - push @unset, $l; - } elsif (($? >> 8) == 5) { - push @not_found, $l; - } else { - $self->err("# --unset $key error"); - return $self->x_it($?); - } - } - if (@unset) { - next if $quiet; - $self->err("# $_ gone") for @unset; - } elsif (@not_found) { - $self->err("# $_ not found") for @not_found; - } # else { already exited +sub canonicalize_excludes { + my ($lei, $excludes) = @_; + my %x; + for my $loc (@$excludes) { + my @l = get_externals($lei, $loc, 1); + $x{$_} = 1 for @l; } + \%x; } # returns an anonymous sub which returns an array of potential results sub complete_url_prepare { - my $argv = $_[-1]; - # Workaround bash word-splitting URLs to ['https', ':', '//' ...] - # Maybe there's a better way to go about this in - # contrib/completion/lei-completion.bash - my $re = ''; - my $cur = pop(@$argv) // ''; + my $argv = $_[-1]; # $_[0] may be $lei + # Workaround bash default COMP_WORDBREAKS splitting URLs to + # ['https', ':', '//', ...]. COMP_WORDBREAKS is global for all + # completions loaded, not just ours, so we can't change it. + # cf. contrib/completion/lei-completion.bash + my ($pfx, $cur) = ('', pop(@$argv) // ''); if (@$argv) { my @x = @$argv; - if ($cur eq ':' && @x) { + if ($cur =~ /\A[:;=]\z/) { # COMP_WORDBREAKS + URL union push @x, $cur; $cur = ''; } - while (@x > 2 && $x[0] !~ /\A(?:http|nntp|imap)s?\z/i && - $x[1] ne ':') { - shift @x; + while (@x && $pfx !~ m!\A(?: (?:[\+\-]?(?:L|kw):) | + (?:(?:imap|nntp|http)s?:) | + (?:--\w?\z)|(?:-\w?\z) )!x) { + $pfx = pop(@x).$pfx; } - if (@x >= 2) { # qw(https : hostname : 443) or qw(http :) - $re = join('', @x); - } else { # just filter out the flags and hope for the best - $re = join('', grep(!/^-/, @$argv)); - } - $re = quotemeta($re); } + my $re = qr!\A\Q$pfx\E(\Q$cur\E.*)!; my $match_cb = sub { # the "//;" here (for AUTH=ANONYMOUS) interacts badly with # bash tab completion, strip it out for now since our commands # work w/o it. Not sure if there's a better solution... $_[0] =~ s!//;AUTH=ANONYMOUS\@!//!i; - $_[0] =~ s!;!\\;!g; # only return the part specified on the CLI # don't duplicate if already 100% completed - $_[0] =~ /\A$re(\Q$cur\E.*)/ ? ($cur eq $1 ? () : $1) : () + $_[0] =~ $re ? ($cur eq $1 ? () : $1) : () }; - wantarray ? ($re, $cur, $match_cb) : $match_cb; -} - -# shell completion helper called by lei__complete -sub _complete_forget_external { - my ($self, @argv) = @_; - my $cfg = $self->_lei_cfg; - my ($cur, $re, $match_cb) = complete_url_prepare(\@argv); - # FIXME: bash completion off "http:" or "https:" when the last - # character is a colon doesn't work properly even if we're - # returning "//$HTTP_HOST/$PATH_INFO/", not sure why, could - # be a bash issue. - map { - $match_cb->(substr($_, length('external.'))); - } grep(/\Aexternal\.$re\Q$cur/, @{$cfg->{-section_order}}); -} - -sub _complete_add_external { # for bash, this relies on "compopt -o nospace" - my ($self, @argv) = @_; - my $cfg = $self->_lei_cfg; - my $match_cb = complete_url_prepare(\@argv); - require URI; - map { - my $u = URI->new(substr($_, length('external.'))); - my ($base) = ($u->path =~ m!((?:/?.*)?/)[^/]+/?\z!); - $u->path($base); - $match_cb->($u->as_string); - } grep(m!\Aexternal\.https?://!, @{$cfg->{-section_order}}); + wantarray ? ($pfx, $cur, $match_cb) : $match_cb; } 1; |