From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 5797A1F462 for ; Wed, 5 Jun 2019 02:18:48 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 25/24] tighten up digit matches to ASCII for git output Date: Wed, 5 Jun 2019 02:18:48 +0000 Message-Id: <20190605021848.29258-1-e@80x24.org> In-Reply-To: <20190604112748.23598-1-e@80x24.org> References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: While I don't expect git to suddenly start spewing non-ASCII digits in places I'd expect ASCII, this would make things easier for future hackers and reviewers. --- lib/PublicInbox/Git.pm | 4 ++-- lib/PublicInbox/Import.pm | 10 +++++----- script/public-inbox-convert | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index 9014e02..68445b3 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -141,7 +141,7 @@ again: } return; } - $head =~ /^[0-9a-f]{40} \S+ (\d+)$/ or + $head =~ /^[0-9a-f]{40} \S+ ([0-9]+)$/ or fail($self, "Unexpected result from git cat-file: $head"); my $size = $1; @@ -319,7 +319,7 @@ sub modified ($) { foreach my $oid (<$fh>) { chomp $oid; my $buf = cat_file($self, $oid) or next; - $$buf =~ /^committer .*?> (\d+) [\+\-]?\d+/sm or next; + $$buf =~ /^committer .*?> ([0-9]+) [\+\-]?[0-9]+/sm or next; my $cmt_time = $1; $modified = $cmt_time if $cmt_time > $modified; } diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index 81a38fb..2c4bad9 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -106,7 +106,7 @@ sub _cat_blob ($$$) { local $/ = "\n"; my $info = <$r>; defined $info or die "EOF from fast-import / cat-blob: $!"; - $info =~ /\A[a-f0-9]{40} blob (\d+)\n\z/ or return; + $info =~ /\A[a-f0-9]{40} blob ([0-9]+)\n\z/ or return; my $left = $1; my $offset = 0; my $buf = ''; @@ -493,9 +493,9 @@ sub clean_purge_buffer { foreach my $i (0..$#$buf) { my $l = $buf->[$i]; - if ($l =~ /^author .* (\d+ [\+-]?\d+)$/) { + if ($l =~ /^author .* ([0-9]+ [\+-]?[0-9]+)$/) { $buf->[$i] = "author <> $1\n"; - } elsif ($l =~ /^data (\d+)/) { + } elsif ($l =~ /^data ([0-9]+)/) { $buf->[$i++] = "data " . length($cmt_msg) . "\n"; $buf->[$i] = $cmt_msg; last; @@ -525,7 +525,7 @@ sub purge_oids { @buf = (); } push @buf, "commit $tmp\n"; - } elsif (/^data (\d+)/) { + } elsif (/^data ([0-9]+)/) { # only commit message, so $len is small: my $len = $1; # + 1 for trailing "\n" push @buf, $_; @@ -557,7 +557,7 @@ sub purge_oids { @buf = (); } elsif ($_ eq "done\n") { $done = 1; - } elsif (/^mark :(\d+)$/) { + } elsif (/^mark :([0-9]+)$/) { push @buf, $_; $mark = $1; } else { diff --git a/script/public-inbox-convert b/script/public-inbox-convert index bd8fb98..99480c3 100755 --- a/script/public-inbox-convert +++ b/script/public-inbox-convert @@ -103,7 +103,7 @@ while (<$rd>) { $state = 'blob'; } elsif (/^commit /) { $state = 'commit'; - } elsif (/^data (\d+)/) { + } elsif (/^data ([0-9]+)/) { my $len = $1; $w->print($_) or $im->wfail; while ($len) { @@ -114,7 +114,7 @@ while (<$rd>) { } next; } elsif ($state eq 'commit') { - if (m{^M 100644 :(\d+) (${h}{2}/${h}{38})}o) { + if (m{^M 100644 :([0-9]+) (${h}{2}/${h}{38})}o) { my ($mark, $path) = ($1, $2); $D{$path} = $mark; if ($last && $last ne 'm') { @@ -134,7 +134,7 @@ while (<$rd>) { $last = 'd'; next; } - if (m{^from (:\d+)}) { + if (m{^from (:[0-9]+)}) { $prev = $from; $from = $1; # no next -- EW