about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2019-01-15 02:42:09 +0000
committerEric Wong <e@80x24.org>2019-01-15 20:57:09 +0000
commitadf443322d8747bbc5b0b1d6e7bf819f70244456 (patch)
treec75db5a5e5c00b0ff3cfb05942872bf590164d5a
parentfe20d568e82cdb3645b42f18f1691d64271aaf7b (diff)
downloadpublic-inbox-adf443322d8747bbc5b0b1d6e7bf819f70244456.tar.gz
This function doesn't have a lot of callers at the moment so
none of them are affected by this change.  But the plan is to
use this in our WWW code for things, so do it now before we
call it in more places.

Results from a Thinkpad X200 with a Core2Duo P8600 @ 2.4GHz:

Benchmark: timing 10 iterations of cp, ip...
        cp: 12.868 wallclock secs (12.86 usr +  0.00 sys = 12.86 CPU) @  0.78/s (n=10)
        ip: 10.9137 wallclock secs (10.91 usr +  0.00 sys = 10.91 CPU) @  0.92/s (n=10)

Note: I mainly care about unquoted performance because
that's the common case for the target audience of public-inbox.

Script used to get benchmark results against the Linux source tree:
==> bench_unquote.perl <==
use strict;
use warnings;
use Benchmark ':hireswallclock';
my $nr = 50;

my %GIT_ESC = (
	a => "\a",
	b => "\b",
	f => "\f",
	n => "\n",
	r => "\r",
	t => "\t",
	v => "\013",
);

sub git_unquote_ip ($) {
	return $_[0] unless ($_[0] =~ /\A"(.*)"\z/);
	$_[0] = $1;
	$_[0] =~ s/\\([abfnrtv])/$GIT_ESC{$1}/g;
	$_[0] =~ s/\\([0-7]{1,3})/chr(oct($1))/ge;
	$_[0];
}

sub git_unquote_cp ($) {
	my ($s) = @_;
	return $s unless ($s =~ /\A"(.*)"\z/);
	$s = $1;
	$s =~ s/\\([abfnrtv])/$GIT_ESC{$1}/g;
	$s =~ s/\\([0-7]{1,3})/chr(oct($1))/ge;
	$s;
}

chomp(my @files = `git -C ~/linux ls-tree --name-only -r v4.19.13`);
timethese(10, {
	cp => sub { for (0..$nr) { git_unquote_cp($_) for @files } },
	ip => sub { for (0..$nr) { git_unquote_ip($_) for @files } },
});
-rw-r--r--lib/PublicInbox/Git.pm11
-rw-r--r--t/git.t4
2 files changed, 9 insertions, 6 deletions
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index 8d3f87d5..4601f259 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -27,12 +27,11 @@ my %GIT_ESC = (
 
 # unquote pathnames used by git, see quote.c::unquote_c_style.c in git.git
 sub git_unquote ($) {
-        my ($s) = @_;
-        return $s unless ($s =~ /\A"(.*)"\z/);
-        $s = $1;
-        $s =~ s/\\([abfnrtv])/$GIT_ESC{$1}/g;
-        $s =~ s/\\([0-7]{1,3})/chr(oct($1))/ge;
-        $s;
+        return $_[0] unless ($_[0] =~ /\A"(.*)"\z/);
+        $_[0] = $1;
+        $_[0] =~ s/\\([abfnrtv])/$GIT_ESC{$1}/g;
+        $_[0] =~ s/\\([0-7]{1,3})/chr(oct($1))/ge;
+        $_[0];
 }
 
 sub new {
diff --git a/t/git.t b/t/git.t
index 6538b6ca..50ec4fbf 100644
--- a/t/git.t
+++ b/t/git.t
@@ -144,4 +144,8 @@ if ('alternates reloaded') {
         is($$found, $config, 'alternates reloaded');
 }
 
+use_ok 'PublicInbox::Git', qw(git_unquote);
+is("foo\nbar", git_unquote('"foo\\nbar"'.''), 'unquoted newline');
+is("Eléanor", git_unquote('"El\\303\\251anor"'.''), 'unquoted octal');
+
 done_testing();