user/dev discussion of public-inbox itself
 help / color / Atom feed
* [PATCH] git: various minor speedups
@ 2020-04-28  8:48 Eric Wong
  2020-04-29 20:33 ` Eric Wong
  0 siblings, 1 reply; 2+ messages in thread
From: Eric Wong @ 2020-04-28  8:48 UTC (permalink / raw)
  To: meta

While testing performance improvements elsewhere, I noticed some
micro-optimizations could give a small ~2-3% speedup in my test
using the git async API to parse a large inbox.

The `read' perlfunc already has read-in-full behavior (unless
git is killed unexpectedly), so there's no point in using a
loop.  SearchIdxShard in the parallel v2 indexing code path
never looped on `read', either.

Furthermore, we can avoid method dispatch overhead on ->getline
and ->print by using `readline' and `print' as ops which can be
resolved during the Perl compilation phase.

Finally, avoid passing the IO handle around as a parameter,
since avoiding hash lookups with a local variable has its own
costs in stack and refcount bumping.

Best off all, there's less code :>
---
 lib/PublicInbox/Git.pm | 48 +++++++++++++++++-------------------------
 1 file changed, 19 insertions(+), 29 deletions(-)

diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index 8410b2fc..f1911534 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -125,59 +125,49 @@ sub _bidi_pipe {
 	$self->{$in} = $in_r;
 }
 
-sub read_cat_in_full ($$$) {
-	my ($self, $in, $left) = @_;
-	my $offset = 0;
-	my $buf = '';
-	while ($left > 0) {
-		my $r = read($in, $buf, $left, $offset);
-		defined($r) or fail($self, "read failed: $!");
-		$r == 0 and fail($self, 'exited unexpectedly');
-		$left -= $r;
-		$offset += $r;
-	}
-	my $r = read($in, my $lf, 1);
-	defined($r) or fail($self, "read failed: $!");
-	fail($self, 'newline missing after blob') if ($r != 1 || $lf ne "\n");
+sub read_cat_in_full ($$) {
+	my ($self, $left) = @_;
+	++$left; # for final "\n" added by git
+	my $r = read($self->{in}, my $buf, $left) == $left or
+		fail($self, 'short read');
+	chop($buf) eq "\n" or fail($self, 'newline missing after blob');
 	\$buf;
 }
 
-sub _cat_async_step ($$$) {
-	my ($self, $inflight, $in) = @_;
+sub _cat_async_step ($$) {
+	my ($self, $inflight) = @_;
 	my $pair = shift @$inflight or die 'BUG: inflight empty';
 	my ($cb, $arg) = @$pair;
 	local $/ = "\n";
-	my $head = $in->getline;
+	my $head = readline($self->{in});
 	$head =~ / missing$/ and return
 		eval { $cb->(undef, undef, undef, undef, $arg) };
 
 	$head =~ /^([0-9a-f]{40}) (\S+) ([0-9]+)$/ or
 		fail($self, "Unexpected result from async git cat-file: $head");
 	my ($oid_hex, $type, $size) = ($1, $2, $3 + 0);
-	my $bref = read_cat_in_full($self, $in, $size);
+	my $bref = read_cat_in_full($self, $size);
 	eval { $cb->($bref, $oid_hex, $type, $size, $arg) };
 }
 
 sub cat_async_wait ($) {
 	my ($self) = @_;
 	my $inflight = delete $self->{inflight} or return;
-	my $in = $self->{in};
 	while (scalar(@$inflight)) {
-		_cat_async_step($self, $inflight, $in);
+		_cat_async_step($self, $inflight);
 	}
 }
 
 sub cat_file {
 	my ($self, $obj, $ref) = @_;
-	my ($retried, $in, $head);
+	my ($retried, $head);
 	cat_async_wait($self);
 again:
 	batch_prepare($self);
-	$self->{out}->print($obj, "\n") or fail($self, "write error: $!");
+	print { $self->{out} } $obj, "\n" or fail($self, "write error: $!");
 
-	$in = $self->{in};
 	local $/ = "\n";
-	$head = $in->getline;
+	$head = readline($self->{in});
 	if ($head =~ / missing$/) {
 		if (!$retried && alternates_changed($self)) {
 			$retried = 1;
@@ -191,7 +181,7 @@ again:
 
 	my $size = $1;
 	$$ref = $size if $ref;
-	read_cat_in_full($self, $in, $size);
+	read_cat_in_full($self, $size);
 }
 
 sub batch_prepare ($) { _bidi_pipe($_[0], qw(--batch in out pid)) }
@@ -199,9 +189,9 @@ sub batch_prepare ($) { _bidi_pipe($_[0], qw(--batch in out pid)) }
 sub check {
 	my ($self, $obj) = @_;
 	_bidi_pipe($self, qw(--batch-check in_c out_c pid_c err_c));
-	$self->{out_c}->print($obj, "\n") or fail($self, "write error: $!");
+	print { $self->{out_c} } $obj, "\n" or fail($self, "write error: $!");
 	local $/ = "\n";
-	chomp(my $line = $self->{in_c}->getline);
+	chomp(my $line = readline($self->{in_c}));
 	my ($hex, $type, $size) = split(' ', $line);
 
 	# Future versions of git.git may show 'ambiguous', but for now,
@@ -320,10 +310,10 @@ sub cat_async ($$$;$) {
 	my ($self, $oid, $cb, $arg) = @_;
 	my $inflight = $self->{inflight} or die 'BUG: not in async';
 	if (scalar(@$inflight) >= MAX_INFLIGHT) {
-		_cat_async_step($self, $inflight, $self->{in});
+		_cat_async_step($self, $inflight);
 	}
 
-	$self->{out}->print($oid, "\n") or fail($self, "write error: $!");
+	print { $self->{out} } $oid, "\n" or fail($self, "write error: $!");
 	push(@$inflight, [ $cb, $arg ]);
 }
 

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] git: various minor speedups
  2020-04-28  8:48 [PATCH] git: various minor speedups Eric Wong
@ 2020-04-29 20:33 ` Eric Wong
  0 siblings, 0 replies; 2+ messages in thread
From: Eric Wong @ 2020-04-29 20:33 UTC (permalink / raw)
  To: meta

Eric Wong <e@yhbt.net> wrote:
> Best off all, there's less code :>

And even less...

> --- a/lib/PublicInbox/Git.pm
> +++ b/lib/PublicInbox/Git.pm
> @@ -125,59 +125,49 @@ sub _bidi_pipe {
>  	$self->{$in} = $in_r;
>  }
>  
> -sub read_cat_in_full ($$$) {
> -	my ($self, $in, $left) = @_;
> -	my $offset = 0;
> -	my $buf = '';
> -	while ($left > 0) {
> -		my $r = read($in, $buf, $left, $offset);
> -		defined($r) or fail($self, "read failed: $!");
> -		$r == 0 and fail($self, 'exited unexpectedly');
> -		$left -= $r;
> -		$offset += $r;
> -	}
> -	my $r = read($in, my $lf, 1);
> -	defined($r) or fail($self, "read failed: $!");
> -	fail($self, 'newline missing after blob') if ($r != 1 || $lf ne "\n");
> +sub read_cat_in_full ($$) {
> +	my ($self, $left) = @_;
> +	++$left; # for final "\n" added by git
> +	my $r = read($self->{in}, my $buf, $left) == $left or
> +		fail($self, 'short read');
> +	chop($buf) eq "\n" or fail($self, 'newline missing after blob');
>  	\$buf;
>  }

No need to save `$r', and `$len' makes a better name for
a single read() than `$left' with the new code.  Will squash
this in:

diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index f1911534..057135ef 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -126,10 +126,9 @@ sub _bidi_pipe {
 }
 
 sub read_cat_in_full ($$) {
-	my ($self, $left) = @_;
-	++$left; # for final "\n" added by git
-	my $r = read($self->{in}, my $buf, $left) == $left or
-		fail($self, 'short read');
+	my ($self, $len) = @_;
+	++$len; # for final "\n" added by git
+	read($self->{in}, my $buf, $len) == $len or fail($self, 'short read');
 	chop($buf) eq "\n" or fail($self, 'newline missing after blob');
 	\$buf;
 }

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, back to index

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-28  8:48 [PATCH] git: various minor speedups Eric Wong
2020-04-29 20:33 ` Eric Wong

user/dev discussion of public-inbox itself

Archives are clonable:
	git clone --mirror http://public-inbox.org/meta
	git clone --mirror http://czquwvybam4bgbro.onion/meta
	git clone --mirror http://hjrcffqmbrq6wope.onion/meta
	git clone --mirror http://ou63pmih66umazou.onion/meta

Example config snippet for mirrors

Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta
	nntp://ou63pmih66umazou.onion/inbox.comp.mail.public-inbox.meta
	nntp://czquwvybam4bgbro.onion/inbox.comp.mail.public-inbox.meta
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.mail.public-inbox.meta
	nntp://news.gmane.io/gmane.mail.public-inbox.general

 note: .onion URLs require Tor: https://www.torproject.org/

AGPL code for this site: git clone https://public-inbox.org/public-inbox.git