From adf443322d8747bbc5b0b1d6e7bf819f70244456 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 15 Jan 2019 02:42:09 +0000 Subject: git_unquote: perform modifications in-place This function doesn't have a lot of callers at the moment so none of them are affected by this change. But the plan is to use this in our WWW code for things, so do it now before we call it in more places. Results from a Thinkpad X200 with a Core2Duo P8600 @ 2.4GHz: Benchmark: timing 10 iterations of cp, ip... cp: 12.868 wallclock secs (12.86 usr + 0.00 sys = 12.86 CPU) @ 0.78/s (n=10) ip: 10.9137 wallclock secs (10.91 usr + 0.00 sys = 10.91 CPU) @ 0.92/s (n=10) Note: I mainly care about unquoted performance because that's the common case for the target audience of public-inbox. Script used to get benchmark results against the Linux source tree: ==> bench_unquote.perl <== use strict; use warnings; use Benchmark ':hireswallclock'; my $nr = 50; my %GIT_ESC = ( a => "\a", b => "\b", f => "\f", n => "\n", r => "\r", t => "\t", v => "\013", ); sub git_unquote_ip ($) { return $_[0] unless ($_[0] =~ /\A"(.*)"\z/); $_[0] = $1; $_[0] =~ s/\\([abfnrtv])/$GIT_ESC{$1}/g; $_[0] =~ s/\\([0-7]{1,3})/chr(oct($1))/ge; $_[0]; } sub git_unquote_cp ($) { my ($s) = @_; return $s unless ($s =~ /\A"(.*)"\z/); $s = $1; $s =~ s/\\([abfnrtv])/$GIT_ESC{$1}/g; $s =~ s/\\([0-7]{1,3})/chr(oct($1))/ge; $s; } chomp(my @files = `git -C ~/linux ls-tree --name-only -r v4.19.13`); timethese(10, { cp => sub { for (0..$nr) { git_unquote_cp($_) for @files } }, ip => sub { for (0..$nr) { git_unquote_ip($_) for @files } }, }); --- lib/PublicInbox/Git.pm | 11 +++++------ t/git.t | 4 ++++ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index 8d3f87d5..4601f259 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -27,12 +27,11 @@ my %GIT_ESC = ( # unquote pathnames used by git, see quote.c::unquote_c_style.c in git.git sub git_unquote ($) { - my ($s) = @_; - return $s unless ($s =~ /\A"(.*)"\z/); - $s = $1; - $s =~ s/\\([abfnrtv])/$GIT_ESC{$1}/g; - $s =~ s/\\([0-7]{1,3})/chr(oct($1))/ge; - $s; + return $_[0] unless ($_[0] =~ /\A"(.*)"\z/); + $_[0] = $1; + $_[0] =~ s/\\([abfnrtv])/$GIT_ESC{$1}/g; + $_[0] =~ s/\\([0-7]{1,3})/chr(oct($1))/ge; + $_[0]; } sub new { diff --git a/t/git.t b/t/git.t index 6538b6ca..50ec4fbf 100644 --- a/t/git.t +++ b/t/git.t @@ -144,4 +144,8 @@ if ('alternates reloaded') { is($$found, $config, 'alternates reloaded'); } +use_ok 'PublicInbox::Git', qw(git_unquote); +is("foo\nbar", git_unquote('"foo\\nbar"'.''), 'unquoted newline'); +is("Eléanor", git_unquote('"El\\303\\251anor"'.''), 'unquoted octal'); + done_testing(); -- cgit v1.2.3-24-ge0c7