From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 31E39211B4; Wed, 9 Jan 2019 11:43:27 +0000 (UTC) From: Eric Wong To: Konstantin Ryabitsev Cc: meta@public-inbox.org Subject: [RFC 1/2] config: inbox name checking matches git.git more closely Date: Wed, 9 Jan 2019 11:43:26 +0000 Message-Id: <20190109114327.1901-2-e@80x24.org> In-Reply-To: <20190109114327.1901-1-e@80x24.org> References: <20190109114327.1901-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Actually, it turns out git.git/remote.c::valid_remote_nick rules alone are insufficient. More checking is performed as part of the refname in the git.git/refs.c::check_refname_component I also considered rejecting URL-unfriendly inbox names entirely, but realized some users may intentionally configure names not handled by our WWW endpoint for archives they don't want accessible over HTTP. --- lib/PublicInbox/Config.pm | 20 ++++++++++++++++++-- lib/PublicInbox/WWW.pm | 4 +++- t/config.t | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 3 deletions(-) diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index a2b721d..bea2617 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -152,6 +152,23 @@ sub git_config_dump { \%rv; } +sub valid_inbox_name ($) { + my ($name) = @_; + + # Similar rules found in git.git/remote.c::valid_remote_nick + # and git.git/refs.c::check_refname_component + # We don't reject /\.lock\z/, however, since we don't lock refs + if ($name eq '' || $name =~ /\@\{/ || + $name =~ /\.\./ || $name =~ m![/:\?\[\]\^~\s\f[:cntrl:]\*]! || + $name =~ /\A\./ || $name =~ /\.\z/) { + return 0; + } + + # Note: we allow URL-unfriendly characters; users may configure + # non-HTTP-accessible inboxes + 1; +} + sub _fill { my ($self, $pfx) = @_; my $rv = {}; @@ -185,8 +202,7 @@ sub _fill { my $name = $pfx; $name =~ s/\Apublicinbox\.//; - # same rules as git.git/remote.c::valid_remote_nick - if ($name eq '' || $name =~ m!/! || $name eq '.' || $name eq '..') { + if (!valid_inbox_name($name)) { warn "invalid inbox name: '$name'\n"; return; } diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index c1c3926..3562e46 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -19,7 +19,9 @@ use URI::Escape qw(uri_unescape); use PublicInbox::MID qw(mid_escape); require PublicInbox::Git; use PublicInbox::GitHTTPBackend; -our $INBOX_RE = qr!\A/([\w\.\-]+)!; + +# TODO: consider a routing tree now that we have more endpoints: +our $INBOX_RE = qr!\A/([\w\-][\w\.\-]*)!; our $MID_RE = qr!([^/]+)!; our $END_RE = qr!(T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!; our $ATTACH_RE = qr!(\d[\.\d]*)-([[:alnum:]][\w\.-]+[[:alnum:]])!i; diff --git a/t/config.t b/t/config.t index 6a6b98c..5f0a95b 100644 --- a/t/config.t +++ b/t/config.t @@ -114,4 +114,40 @@ my $tmpdir = tempdir('pi-config-XXXXXX', TMPDIR => 1, CLEANUP => 1); }, 'known addresses populated'); } +my @invalid = ( + # git rejects this because it locks refnames, but we don't have + # this problem with inbox names: + # 'inbox.lock', + + # git rejects these: + '', '..', '.', 'stash@{9}', 'inbox.', '^caret', '~tilde', + '*asterisk', 's p a c e s', ' leading-space', 'trailing-space ', + 'question?', 'colon:', '[square-brace]', "\fformfeed", + "\0zero", "\bbackspace", + +); + +require Data::Dumper; +for my $s (@invalid) { + my $d = Data::Dumper->new([$s])->Terse(1)->Indent(0)->Dump; + ok(!PublicInbox::Config::valid_inbox_name($s), "$d name rejected"); +} + +# obviously-valid examples +my @valid = qw(a a@example a@example.com); + +# Rejecting more was considered, but then it dawned on me that +# people may intentionally use inbox names which are not URL-friendly +# to prevent the PSGI interface from displaying them... +# URL-unfriendly +# '<', '>', '%', '#', '?', '&', '(', ')', + +# maybe these aren't so bad, they're common in Message-IDs, even: +# '!', '$', '=', '+' +push @valid, qw[bang! ca$h less< more> 1% (parens) &more eql= +plus], '#hash'; +for my $s (@valid) { + my $d = Data::Dumper->new([$s])->Terse(1)->Indent(0)->Dump; + ok(PublicInbox::Config::valid_inbox_name($s), "$d name accepted"); +} + done_testing(); -- EW