about summary refs log tree commit homepage
path: root/lib/PublicInbox/LeiExternal.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2021-02-10 07:07:44 +0000
committerEric Wong <e@80x24.org>2021-02-10 19:21:32 +0000
commitdd80cd59cf0c90d61257fbfe8176e3312edbb841 (patch)
tree0eca4983372b1c80377663179157b993bb2535e9 /lib/PublicInbox/LeiExternal.pm
parent04b73104416e4734b3a01e47525119cac867065a (diff)
downloadpublic-inbox-dd80cd59cf0c90d61257fbfe8176e3312edbb841.tar.gz
The "ls-external" now accepts the same glob patterns used by
with lei q --{include,only,exclude}.  If no glob is detected, it
will be treated as a literal substring match (like "grep -F").

Inverting matches is also supported ("grep -v").
Diffstat (limited to 'lib/PublicInbox/LeiExternal.pm')
-rw-r--r--lib/PublicInbox/LeiExternal.pm74
1 files changed, 52 insertions, 22 deletions
diff --git a/lib/PublicInbox/LeiExternal.pm b/lib/PublicInbox/LeiExternal.pm
index b65dc87c..bac15226 100644
--- a/lib/PublicInbox/LeiExternal.pm
+++ b/lib/PublicInbox/LeiExternal.pm
@@ -22,22 +22,16 @@ sub externals_each {
         # highest boost first, but stable for alphabetic tie break
         use sort 'stable';
         my @order = sort { $boost{$b} <=> $boost{$a} } sort keys %boost;
-        return @order if !$cb;
-        for my $loc (@order) {
-                $cb->(@arg, $loc, $boost{$loc});
+        if (ref($cb) eq 'CODE') {
+                for my $loc (@order) {
+                        $cb->(@arg, $loc, $boost{$loc});
+                }
+        } elsif (ref($cb) eq 'HASH') {
+                %$cb = %boost;
         }
         @order; # scalar or array
 }
 
-sub lei_ls_external {
-        my ($self, @argv) = @_;
-        my ($OFS, $ORS) = $self->{opt}->{z} ? ("\0", "\0\0") : (" ", "\n");
-        externals_each($self, sub {
-                my ($loc, $boost_val) = @_;
-                $self->out($loc, $OFS, 'boost=', $boost_val, $ORS);
-        });
-}
-
 sub ext_canonicalize {
         my ($location) = @_;
         if ($location !~ m!\Ahttps?://!) {
@@ -52,28 +46,47 @@ sub ext_canonicalize {
         }
 }
 
-my %patmap = ('*' => '[^/]*?', '?' => '[^/]', '[' => '[', ']' => ']');
-sub glob2pat {
-        my ($glob) = @_;
-        $glob =~ s!(.)!$patmap{$1} || "\Q$1"!ge;
-        $glob;
+my %re_map = ( '*' => '[^/]*?', '?' => '[^/]',
+                '[' => '[', ']' => ']', ',' => ',' );
+
+sub glob2re {
+        my ($re) = @_;
+        my $p = '';
+        my $in_bracket = 0;
+        my $qm = 0;
+        my $changes = ($re =~ s!(.)!
+                $re_map{$p eq '\\' ? '' : do {
+                        if ($1 eq '[') { ++$in_bracket }
+                        elsif ($1 eq ']') { --$in_bracket }
+                        $p = $1;
+                }} // do {
+                        $p = $1;
+                        ($p eq '-' && $in_bracket) ? $p : (++$qm, "\Q$p")
+                }!sge);
+        # bashism (also supported by curl): {a,b,c} => (a|b|c)
+        $re =~ s/([^\\]*)\\\{([^,]*?,[^\\]*?)\\\}/
+                (my $in_braces = $2) =~ tr!,!|!;
+                $1."($in_braces)";
+                /sge;
+        ($changes - $qm) ? $re : undef;
 }
 
+# get canonicalized externals list matching $loc
+# $is_exclude denotes it's for --exclude
+# otherwise it's for --only/--include is assumed
 sub get_externals {
-        my ($self, $loc, $exclude) = @_;
+        my ($self, $loc, $is_exclude) = @_;
         return (ext_canonicalize($loc)) if -e $loc;
-
         my @m;
         my @cur = externals_each($self);
         my $do_glob = !$self->{opt}->{globoff}; # glob by default
-        if ($do_glob && ($loc =~ /[\*\?]/s || $loc =~ /\[.*\]/s)) {
-                my $re = glob2pat($loc);
+        if ($do_glob && (my $re = glob2re($loc))) {
                 @m = grep(m!$re!, @cur);
                 return @m if scalar(@m);
         } elsif (index($loc, '/') < 0) { # exact basename match:
                 @m = grep(m!/\Q$loc\E/?\z!, @cur);
                 return @m if scalar(@m) == 1;
-        } elsif ($exclude) { # URL, maybe:
+        } elsif ($is_exclude) { # URL, maybe:
                 my $canon = ext_canonicalize($loc);
                 @m = grep(m!\A\Q$canon\E\z!, @cur);
                 return @m if scalar(@m) == 1;
@@ -88,6 +101,23 @@ sub get_externals {
         ();
 }
 
+sub lei_ls_external {
+        my ($self, $filter) = @_;
+        my $do_glob = !$self->{opt}->{globoff}; # glob by default
+        my ($OFS, $ORS) = $self->{opt}->{z} ? ("\0", "\0\0") : (" ", "\n");
+        $filter //= '*';
+        my $re = $do_glob ? glob2re($filter) : undef;
+        $re //= index($filter, '/') < 0 ?
+                        qr!/\Q$filter\E/?\z! : # exact basename match
+                        qr/\Q$filter\E/; # grep -F semantics
+        my @ext = externals_each($self, my $boost = {});
+        @ext = $self->{opt}->{'invert-match'} ? grep(!/$re/, @ext)
+                                        : grep(/$re/, @ext);
+        for my $loc (@ext) {
+                $self->out($loc, $OFS, 'boost=', $boost->{$loc}, $ORS);
+        }
+}
+
 sub add_external_finish {
         my ($self, $location) = @_;
         my $cfg = $self->_lei_cfg(1);