about summary refs log tree commit homepage
path: root/lib/PublicInbox/Config.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/PublicInbox/Config.pm')
-rw-r--r--lib/PublicInbox/Config.pm298
1 files changed, 224 insertions, 74 deletions
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index a31b5b74..d6300610 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -10,26 +10,28 @@
 package PublicInbox::Config;
 use strict;
 use v5.10.1;
+use parent qw(Exporter);
+our @EXPORT_OK = qw(glob2re rel2abs_collapsed);
 use PublicInbox::Inbox;
-use PublicInbox::Spawn qw(popen_rd);
+use PublicInbox::Spawn qw(popen_rd run_qx);
 our $LD_PRELOAD = $ENV{LD_PRELOAD}; # only valid at startup
+our $DEDUPE; # set to {} to dedupe or clear cache
 
 sub _array ($) { ref($_[0]) eq 'ARRAY' ? $_[0] : [ $_[0] ] }
 
 # returns key-value pairs of config directives in a hash
 # if keys may be multi-value, the value is an array ref containing all values
 sub new {
-        my ($class, $file, $errfh) = @_;
+        my ($class, $file, $lei) = @_;
         $file //= default_file();
-        my $self;
-        if (ref($file) eq 'SCALAR') { # used by some tests
-                open my $fh, '<', $file or die;  # PerlIO::scalar
-                $self = config_fh_parse($fh, "\n", '=');
-                bless $self, $class;
-        } else {
-                $self = git_config_dump($class, $file, $errfh);
-                $self->{'-f'} = $file;
-        }
+        my ($self, $set_dedupe);
+        if (-f $file && $DEDUPE) {
+                $file = rel2abs_collapsed($file);
+                $self = $DEDUPE->{$file} and return $self;
+                $set_dedupe = 1;
+        }
+        $self = git_config_dump($class, $file, $lei);
+        $self->{-f} = $file;
         # caches
         $self->{-by_addr} = {};
         $self->{-by_list_id} = {};
@@ -38,8 +40,7 @@ sub new {
         $self->{-by_eidx_key} = {};
         $self->{-no_obfuscate} = {};
         $self->{-limiters} = {};
-        $self->{-code_repos} = {}; # nick => PublicInbox::Git object
-        $self->{-cgitrc_unparsed} = $self->{'publicinbox.cgitrc'};
+        $self->{-coderepos} = {}; # nick => PublicInbox::Git object
 
         if (my $no = delete $self->{'publicinbox.noobfuscate'}) {
                 $no = _array($no);
@@ -62,7 +63,7 @@ sub new {
         if (my $css = delete $self->{'publicinbox.css'}) {
                 $self->{css} = _array($css);
         }
-
+        $DEDUPE->{$file} = $self if $set_dedupe;
         $self;
 }
 
@@ -123,9 +124,9 @@ sub lookup_newsgroup {
 sub limiter {
         my ($self, $name) = @_;
         $self->{-limiters}->{$name} //= do {
-                require PublicInbox::Qspawn;
+                require PublicInbox::Limiter;
                 my $max = $self->{"publicinboxlimiter.$name.max"} || 1;
-                my $limiter = PublicInbox::Qspawn::Limiter->new($max);
+                my $limiter = PublicInbox::Limiter->new($max);
                 $limiter->setup_rlimit($name, $self);
                 $limiter;
         };
@@ -143,8 +144,11 @@ sub config_fh_parse ($$$) {
         local $/ = $rs;
         while (defined($line = <$fh>)) { # perf critical with giant configs
                 $i = index($line, $fs);
+                # $i may be -1 if $fs not found and it's a key-only entry
+                # (meaning boolean true).  Either way the -1 will drop the
+                # $rs either from $k or $v.
                 $k = substr($line, 0, $i);
-                $v = substr($line, $i + 1, -1); # chop off $fs
+                $v = $i >= 0 ? substr($line, $i + 1, -1) : 1;
                 $section = substr($k, 0, rindex($k, '.'));
                 $seen{$section} //= push(@section_order, $section);
 
@@ -163,13 +167,34 @@ sub config_fh_parse ($$$) {
         \%rv;
 }
 
+sub tmp_cmd_opt ($$) {
+        my ($env, $opt) = @_;
+        # quiet global and system gitconfig if supported by installed git,
+        # but normally harmless if too noisy (NOGLOBAL no longer exists)
+        $env->{GIT_CONFIG_NOSYSTEM} = 1;
+        $env->{GIT_CONFIG_GLOBAL} = '/dev/null'; # git v2.32+
+        $opt->{-C} = '/'; # avoid $worktree/.git/config on MOST systems :P
+}
+
 sub git_config_dump {
-        my ($class, $file, $errfh) = @_;
-        return bless {}, $class unless -e $file;
-        my $cmd = [ qw(git config -z -l --includes), "--file=$file" ];
-        my $fh = popen_rd($cmd, undef, { 2 => $errfh // 2 });
+        my ($class, $file, $lei) = @_;
+        my @opt_c = map { ('-c', $_) } @{$lei->{opt}->{c} // []};
+        $file = undef if !-e $file;
+        # XXX should we set {-f} if !-e $file?
+        return bless {}, $class if (!@opt_c && !defined($file));
+        my %env;
+        my $opt = { 2 => $lei->{2} // 2 };
+        if (@opt_c) {
+                unshift(@opt_c, '-c', "include.path=$file") if defined($file);
+                tmp_cmd_opt(\%env, $opt);
+        }
+        my @cmd = ('git', @opt_c, qw(config -z -l --includes));
+        push(@cmd, '-f', $file) if !@opt_c && defined($file);
+        my $fh = popen_rd(\@cmd, \%env, $opt);
         my $rv = config_fh_parse($fh, "\0", "\n");
-        close $fh or die "@$cmd failed: \$?=$?\n";
+        $fh->close or die "@cmd failed: \$?=$?\n";
+        $rv->{-opt_c} = \@opt_c if @opt_c; # for ->urlmatch
+        $rv->{-f} = $file;
         bless $rv, $class;
 }
 
@@ -220,7 +245,6 @@ sub cgit_repo_merge ($$$) {
                         $rel =~ s!/?\.git\z!!;
         }
         $self->{"coderepo.$rel.dir"} //= $path;
-        $self->{"coderepo.$rel.cgiturl"} //= _array($rel);
 }
 
 sub is_git_dir ($) {
@@ -256,10 +280,11 @@ sub scan_tree_coderepo ($$) {
         scan_path_coderepo($self, $path, $path);
 }
 
-sub scan_projects_coderepo ($$$) {
-        my ($self, $list, $path) = @_;
-        open my $fh, '<', $list or do {
-                warn "failed to open cgit projectlist=$list: $!\n";
+sub scan_projects_coderepo ($$) {
+        my ($self, $path) = @_;
+        my $l = $self->{-cgit_project_list} // die 'BUG: no cgit_project_list';
+        open my $fh, '<', $l or do {
+                warn "failed to open cgit project-list=$l: $!\n";
                 return;
         };
         while (<$fh>) {
@@ -268,8 +293,20 @@ sub scan_projects_coderepo ($$$) {
         }
 }
 
+sub apply_cgit_scan_path {
+        my ($self, @paths) = @_;
+        @paths or @paths = @{$self->{-cgit_scan_path}};
+        if (defined($self->{-cgit_project_list})) {
+                for my $p (@paths) { scan_projects_coderepo($self, $p) }
+        } else {
+                for my $p (@paths) { scan_tree_coderepo($self, $p) }
+        }
+}
+
 sub parse_cgitrc {
         my ($self, $cgitrc, $nesting) = @_;
+        $cgitrc //= $self->{'publicinbox.cgitrc'} //
+                        $ENV{CGIT_CONFIG} // return;
         if ($nesting == 0) {
                 # defaults:
                 my %s = map { $_ => 1 } qw(/cgit.css /cgit.png
@@ -309,34 +346,41 @@ sub parse_cgitrc {
                         my ($k, $v) = ($1, $2);
                         $k =~ tr/-/_/;
                         $self->{"-cgit_$k"} = $v;
+                        delete $self->{-cgit_scan_path} if $k eq 'project_list';
                 } elsif (m!\Ascan-path=(.+)\z!) {
-                        if (defined(my $list = $self->{-cgit_project_list})) {
-                                scan_projects_coderepo($self, $list, $1);
-                        } else {
-                                scan_tree_coderepo($self, $1);
-                        }
+                        # this depends on being after project-list in the
+                        # config file, just like cgit.c
+                        push @{$self->{-cgit_scan_path}}, $1;
+                        apply_cgit_scan_path($self, $1);
                 } elsif (m!\A(?:css|favicon|logo|repo\.logo)=(/.+)\z!) {
                         # absolute paths for static files via PublicInbox::Cgit
                         $self->{-cgit_static}->{$1} = 1;
+                } elsif (s!\Asnapshots=\s*!!) {
+                        $self->{'coderepo.snapshots'} = $_;
                 }
         }
         cgit_repo_merge($self, $repo->{dir}, $repo) if $repo;
 }
 
+sub valid_dir ($$) {
+        my $dir = get_1($_[0], $_[1]) // return;
+        index($dir, "\n") < 0 ? $dir : do {
+                warn "E: `$_[1]=$dir' must not contain `\\n'\n";
+                undef;
+        }
+}
+
 # parse a code repo, only git is supported at the moment
-sub fill_code_repo {
+sub fill_coderepo {
         my ($self, $nick) = @_;
         my $pfx = "coderepo.$nick";
-        my $dir = $self->{"$pfx.dir"} // do { # aka "GIT_DIR"
-                warn "$pfx.dir unset\n";
-                return;
-        };
-        my $git = PublicInbox::Git->new($dir);
+        my $git = PublicInbox::Git->new(valid_dir($self, "$pfx.dir") // return);
         if (defined(my $cgits = $self->{"$pfx.cgiturl"})) {
                 $git->{cgit_url} = $cgits = _array($cgits);
                 $self->{"$pfx.cgiturl"} = $cgits;
         }
-
+        my %dedupe = ($nick => undef);
+        ($git->{nick}) = keys %dedupe;
         $git;
 }
 
@@ -361,7 +405,7 @@ sub git_bool {
 # is sufficient and doesn't leave "/.." or "/../"
 sub rel2abs_collapsed {
         require File::Spec;
-        my $p = File::Spec->rel2abs($_[-1]);
+        my $p = File::Spec->rel2abs(@_);
         return $p if substr($p, -3, 3) ne '/..' && index($p, '/../') < 0;
         require Cwd;
         Cwd::abs_path($p);
@@ -377,11 +421,12 @@ sub get_1 {
 
 sub repo_objs {
         my ($self, $ibxish) = @_;
-        my $ibx_code_repos = $ibxish->{coderepo} // return;
         $ibxish->{-repo_objs} // do {
-                my $code_repos = $self->{-code_repos};
+                my $ibx_coderepos = $ibxish->{coderepo} // return;
+                parse_cgitrc($self, undef, 0);
+                my $coderepos = $self->{-coderepos};
                 my @repo_objs;
-                for my $nick (@$ibx_code_repos) {
+                for my $nick (@$ibx_coderepos) {
                         my @parts = split(m!/!, $nick);
                         for (@parts) {
                                 @parts = () unless valid_foo_name($_);
@@ -390,12 +435,16 @@ sub repo_objs {
                                 warn "invalid coderepo name: `$nick'\n";
                                 next;
                         }
-                        my $repo = $code_repos->{$nick} //=
-                                                fill_code_repo($self, $nick);
-                        push @repo_objs, $repo if $repo;
+                        my $repo = $coderepos->{$nick} //=
+                                                fill_coderepo($self, $nick);
+                        $repo ? push(@repo_objs, $repo) :
+                                warn("coderepo.$nick.dir unset\n");
                 }
                 if (scalar @repo_objs) {
-                        $ibxish ->{-repo_objs} = \@repo_objs;
+                        for (@repo_objs) {
+                                push @{$_->{ibx_names}}, $ibxish->{name};
+                        }
+                        $ibxish->{-repo_objs} = \@repo_objs;
                 } else {
                         delete $ibxish->{coderepo};
                 }
@@ -410,18 +459,15 @@ sub _fill_ibx {
                 my $v = $self->{"$pfx.$k"};
                 $ibx->{$k} = $v if defined $v;
         }
-        for my $k (qw(filter inboxdir newsgroup replyto httpbackendmax feedmax
+        for my $k (qw(filter newsgroup replyto httpbackendmax feedmax
                         indexlevel indexsequentialshard boost)) {
                 my $v = get_1($self, "$pfx.$k") // next;
                 $ibx->{$k} = $v;
         }
 
         # "mainrepo" is backwards compatibility:
-        my $dir = $ibx->{inboxdir} //= $self->{"$pfx.mainrepo"} // return;
-        if (index($dir, "\n") >= 0) {
-                warn "E: `$dir' must not contain `\\n'\n";
-                return;
-        }
+        my $dir = $ibx->{inboxdir} = valid_dir($self, "$pfx.inboxdir") //
+                                valid_dir($self, "$pfx.mainrepo") // return;
         for my $k (qw(obfuscate)) {
                 my $v = $self->{"$pfx.$k"} // next;
                 if (defined(my $bval = git_bool($v))) {
@@ -441,7 +487,8 @@ sub _fill_ibx {
         }
 
         return unless valid_foo_name($name, 'publicinbox');
-        $ibx->{name} = $name;
+        my %dedupe = ($name => undef);
+        ($ibx->{name}) = keys %dedupe; # used as a key everywhere
         $ibx->{-pi_cfg} = $self;
         $ibx = PublicInbox::Inbox->new($ibx);
         foreach (@{$ibx->{address}}) {
@@ -469,9 +516,16 @@ sub _fill_ibx {
                         delete $ibx->{newsgroup};
                         warn "newsgroup name invalid: `$ngname'\n";
                 } else {
+                        my $lc = $ibx->{newsgroup} = lc $ngname;
+                        warn <<EOM if $lc ne $ngname;
+W: newsgroup=`$ngname' lowercased to `$lc'
+EOM
                         # PublicInbox::NNTPD does stricter ->nntp_usable
                         # checks, keep this lean for startup speed
-                        $self->{-by_newsgroup}->{$ngname} = $ibx;
+                        my $cur = $self->{-by_newsgroup}->{$lc} //= $ibx;
+                        warn <<EOM if $cur != $ibx;
+W: newsgroup=`$lc' is used by both `$cur->{name}' and `$ibx->{name}'
+EOM
                 }
         }
         unless (defined $ibx->{newsgroup}) { # for ->eidx_key
@@ -491,19 +545,18 @@ sub _fill_ibx {
                 require PublicInbox::Isearch;
                 $ibx->{isrch} = PublicInbox::Isearch->new($ibx, $es);
         }
-        $self->{-by_eidx_key}->{$ibx->eidx_key} = $ibx;
+        my $cur = $self->{-by_eidx_key}->{my $ekey = $ibx->eidx_key} //= $ibx;
+        $cur == $ibx or warn
+                "W: `$ekey' used by both `$cur->{name}' and `$ibx->{name}'\n";
+        $ibx;
 }
 
 sub _fill_ei ($$) {
         my ($self, $name) = @_;
         eval { require PublicInbox::ExtSearch } or return;
         my $pfx = "extindex.$name";
-        my $d = $self->{"$pfx.topdir"} // return;
+        my $d = valid_dir($self, "$pfx.topdir") // return;
         -d $d or return;
-        if (index($d, "\n") >= 0) {
-                warn "E: `$d' must not contain `\\n'\n";
-                return;
-        }
         my $es = PublicInbox::ExtSearch->new($d);
         for my $k (qw(indexlevel indexsequentialshard)) {
                 my $v = get_1($self, "$pfx.$k") // next;
@@ -518,23 +571,76 @@ sub _fill_ei ($$) {
         $es;
 }
 
+sub _fill_csrch ($$) {
+        my ($self, $name) = @_; # "" is a valid name for cindex
+        return if $name ne '' && !valid_foo_name($name, 'cindex');
+        eval { require PublicInbox::CodeSearch } or return;
+        my $pfx = "cindex.$name";
+        my $d = valid_dir($self, "$pfx.topdir") // return;
+        -d $d or return;
+        my $csrch = PublicInbox::CodeSearch->new($d, $self);
+        for my $k (qw(localprefix)) {
+                my $v = $self->{"$pfx.$k"} // next;
+                $csrch->{$k} = _array($v);
+        }
+        $csrch->{name} = $name;
+        $csrch;
+}
+
+sub lookup_cindex ($$) {
+        my ($self, $name) = @_;
+        $self->{-csrch_by_name}->{$name} //= _fill_csrch($self, $name);
+}
+
+sub each_cindex {
+        my ($self, $cb, @arg) = @_;
+        my @csrch = map {
+                lookup_cindex($self, substr($_, length('cindex.'))) // ()
+        } grep(m!\Acindex\.[^\./]*\z!, @{$self->{-section_order}});
+        if (ref($cb) eq 'CODE') {
+                $cb->($_, @arg) for @csrch;
+        } else { # string function
+                $_->$cb(@arg) for @csrch;
+        }
+}
+
+sub config_cmd {
+        my ($self, $env, $opt) = @_;
+        my $f = $self->{-f} // default_file();
+        my @opt_c = @{$self->{-opt_c} // []};
+        my @cmd = ('git', @opt_c, 'config');
+        @opt_c ? tmp_cmd_opt($env, $opt) : push(@cmd, '-f', $f);
+        \@cmd;
+}
+
 sub urlmatch {
-        my ($self, $key, $url) = @_;
+        my $self = shift;
+        my @bool = $_[0] eq '--bool' ? (shift) : ();
+        my ($key, $url, $try_git) = @_;
         state $urlmatch_broken; # requires git 1.8.5
         return if $urlmatch_broken;
-        my $file = $self->{'-f'} // default_file();
-        my $cmd = [qw/git config -z --includes --get-urlmatch/,
-                "--file=$file", $key, $url ];
-        my $fh = popen_rd($cmd);
-        local $/ = "\0";
-        my $val = <$fh>;
-        if (close($fh)) {
-                chomp($val);
-                $val;
-        } else {
-                $urlmatch_broken = 1 if (($? >> 8) != 1);
-                undef;
+        my (%env, %opt);
+        my $cmd = $self->config_cmd(\%env, \%opt);
+        push @$cmd, @bool, qw(--includes -z --get-urlmatch), $key, $url;
+        my $val = run_qx($cmd, \%env, \%opt);
+        if ($?) {
+                undef $val;
+                if (@bool && ($? >> 8) == 128) { # not boolean
+                } elsif (($? >> 8) != 1) {
+                        $urlmatch_broken = 1;
+                } elsif ($try_git) { # n.b. this takes cwd into account
+                        $val = run_qx([qw(git config), @bool,
+                                        qw(-z --get-urlmatch), $key, $url]);
+                        undef $val if $?;
+                }
+        }
+        $? = 0; # don't influence lei exit status
+        if (defined($val)) {
+                local $/ = "\0";
+                chomp $val;
+                $val = git_bool($val) if @bool;
         }
+        $val;
 }
 
 sub json {
@@ -558,4 +664,48 @@ sub squote_maybe ($) {
         $val;
 }
 
+my %re_map = ( '*' => '[^/]*?', '?' => '[^/]',
+                '/**' => '/.*', '**/' => '.*/', '/**/' => '(?:/.*?/|/)',
+                '[' => '[', ']' => ']', ',' => ',' );
+
+sub glob2re ($) {
+        my ($re) = @_;
+        my $p = '';
+        my $in_bracket = 0;
+        my $qm = 0;
+        my $schema_host_port = '';
+
+        # don't glob URL-looking things that look like IPv6
+        if ($re =~ s!\A([a-z0-9\+]+://\[[a-f0-9\:]+\](?::[0-9]+)?/)!!i) {
+                $schema_host_port = quotemeta $1; # "http://[::1]:1234"
+        }
+        my $changes = ($re =~ s!(/\*\*/|\A\*\*/|/\*\*\z|.)!
+                $re_map{$p eq '\\' ? '' : do {
+                        if ($1 eq '[') { ++$in_bracket }
+                        elsif ($1 eq ']') { --$in_bracket }
+                        elsif ($1 eq ',') { ++$qm } # no change
+                        $p = $1;
+                }} // do {
+                        $p = $1;
+                        ($p eq '-' && $in_bracket) ? $p : (++$qm, "\Q$p")
+                }!sge);
+        # bashism (also supported by curl): {a,b,c} => (a|b|c)
+        $changes += ($re =~ s/([^\\]*)\\\{([^,]*,[^\\]*)\\\}/
+                        (my $in_braces = $2) =~ tr!,!|!;
+                        $1."($in_braces)";
+                        /sge);
+        ($changes - $qm) ? $schema_host_port.$re : undef;
+}
+
+sub get_coderepo {
+        my ($self, $nick) = @_;
+        $self->{-coderepos}->{$nick} // do {
+                defined($self->{-cgit_scan_path}) ? do {
+                        apply_cgit_scan_path($self);
+                        my $cr = fill_coderepo($self, $nick);
+                        $cr ? ($self->{-coderepos}->{$nick} = $cr) : undef;
+                } : undef;
+        };
+}
+
 1;