From bec2d26785b20793de0169b30d0af67fd20fcd4e Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 13 Jan 2017 02:13:18 +0000 Subject: repobrowse: port git log view to qspawn streaming interface This will prevent too many processes from being spawned at once while also allowing us to respond to backpressure from slow clients. --- lib/PublicInbox/RepobrowseGitLog.pm | 227 +++++++++++++++++++++++++----------- 1 file changed, 156 insertions(+), 71 deletions(-) diff --git a/lib/PublicInbox/RepobrowseGitLog.pm b/lib/PublicInbox/RepobrowseGitLog.pm index 44b405bc..e62486ba 100644 --- a/lib/PublicInbox/RepobrowseGitLog.pm +++ b/lib/PublicInbox/RepobrowseGitLog.pm @@ -8,95 +8,180 @@ use warnings; use PublicInbox::Hval qw(utf8_html); use base qw(PublicInbox::RepobrowseBase); use PublicInbox::RepobrowseGit qw(git_dec_links git_commit_title); +use PublicInbox::Qspawn; # cannot rely on --date=format-local:... yet, it is too new (September 2015) my $LOG_FMT = '--pretty=tformat:'. join('%x00', qw(%h %p %s D%D %ai a%an b%b), '', ''); -sub call_git_log { - my ($self, $req) = @_; - my $repo_info = $req->{repo_info}; - my $max = $repo_info->{max_commit_count} || 50; - $max = int($max); - $max = 50 if $max == 0; - - my $q = PublicInbox::RepobrowseGitQuery->new($req->{env}); - my $h = $q->{h}; - $h eq '' and $h = 'HEAD'; - - my $git = $repo_info->{git}; - my $log = $git->popen(qw(log --no-notes --no-color --abbrev-commit), - $git->abbrev, $LOG_FMT, "-$max", $h, '--'); - sub { - my ($res) = @_; # Plack callback - my $fh = $res->([200, ['Content-Type'=>'text/html']]); - my $title = "log: $repo_info->{repo} ".utf8_html("($h)"); - $fh->write($self->html_start($req, $title)); - git_log_stream($req, $q, $log, $fh, $git); - $fh->close; +sub parent_links { + if (@_ == 1) { # typical, single-parent commit + qq( / parent $_[0]); + } elsif (@_ > 0) { # merge commit + ' / parents ' . + join(' ', map { qq($_) } @_); + } else { + ''; # root commit } } -sub git_log_stream { - my ($req, $q, $log, $fh, $git) = @_; - - my $rel = $req->{relcmd}; - my %acache; - local $/ = "\0\0\n"; - my $nr = 0; - my (@parents, %seen); - while (defined(my $line = <$log>)) { - my ($id, $p, $s, $D, $ai, $an, $b) = split("\0", $line); - $seen{$id} = 1; - my @p = split(' ', $p); - push @parents, @p; - my $plinks; - if (@p == 1) { # typical, single-parent commit - $plinks = qq( / parent $p[0]); - } elsif (@p > 0) { # merge commit - $plinks = ' / parents ' . join(' ', map { - qq($_); - } @p); - } else { - $plinks = ''; # root commit - } - - $s = utf8_html($s); - $s = qq($s); - if ($D =~ /\AD(.+)/) { - $s .= ' ('. join(', ', git_dec_links($rel, $1)) . ')'; - } - - $an =~ s/\Aa//; - $b =~ s/\Ab//; - $b =~ s/\s*\z//s; - - my $ah = $acache{$an} ||= utf8_html($an); - my $nl = $b eq '' ? '' : "\n"; # empty bodies :< - $b = "$s\n- $ah @ $ai\n commit $id$plinks\n$nl" . - utf8_html($b); - $fh->write("\n\n" .$b); - ++$nr; - } - +sub git_log_sed_end ($$) { + my $req = $_[0]; + my $dst = delete $req->{lhtml} || ''; + $dst .= utf8_html($_[1]); # existing buffer + $dst .= '
';
 	my $m = '';
 	my $np = 0;
-	foreach my $p (@parents) {
-		next if $seen{$p};
-		$seen{$p} = ++$np;
+	my $seen = $req->{seen};
+	my $git = $req->{repo_info}->{git};
+	my $rel = $req->{relcmd};
+	foreach my $p (@{$req->{parents}}) {
+		next if $seen->{$p};
+		$seen->{$p} = ++$np;
 		my $s = git_commit_title($git, $p);
 		$m .= qq(\n$p\t);
 		$s = defined($s) ? utf8_html($s) : '';
 		$m .= qq($s);
 	}
-	my $foot = "

";
 	if ($np == 0) {
-		$foot .= "No commits follow";
+		$dst .= "No commits follow";
 	} elsif ($np > 1) {
-		$foot .= "Unseen parent commits to follow (multiple choice):\n";
+		$dst .= "Unseen parent commits to follow (multiple choice):\n";
 	} else {
-		$foot .= "Next parent to follow:\n";
+		$dst .= "Next parent to follow:\n";
 	}
-	$fh->write($foot .= $m . '
'); + $dst .= $m; + $dst .= ''; +} + +sub git_log_sed ($$) { + my ($self, $req) = @_; + my $buf = ''; + my $state = 'h'; + my %acache; + my $rel = $req->{relcmd}; + my $seen = $req->{seen} = {}; + my $parents = $req->{parents} = []; + my ($plinks, $id, $ai); + sub { + my $dst; + # $_[0] == scalar buffer, undef means EOF from "git log" + return git_log_sed_end($req, $buf) unless defined $_[0]; + $dst = delete $req->{lhtml} || ''; + my @tmp; + $buf .= $_[0]; + @tmp = split(/\0/, $buf, -1); + $buf = @tmp ? pop(@tmp) : ''; + + while (@tmp) { + if ($state eq 'b') { + my $bb = shift @tmp; + $state = 'B' if $bb =~ s/\Ab/\n/; + my @lines = split(/\n/, $bb); + $bb = utf8_html(pop @lines); + $dst .= utf8_html($_)."\n" for @lines; + $dst .= $bb; + } elsif ($state eq 'B') { + my $bb = shift @tmp; + if ($bb eq '') { + $state = 'BB'; + } else { + my @lines = split(/\n/, $bb); + $bb = undef; + my $last = utf8_html(pop @lines); + $dst .= utf8_html($_)."\n" for @lines; + $dst .= $last; + } + } elsif ($state eq 'BB') { + if ($tmp[0] =~ s/\A\n//s) { + $state = 'h'; + } else { + @tmp = (); + warn 'Bad state BB in log parser: ', + $req->{-debug}; + } + } elsif ($state eq 'h') { + if (scalar keys %$seen) { + $dst .= '
';
+				}
+				$id = shift @tmp;
+				$seen->{$id} = 1;
+				$state = 'p'
+			} elsif ($state eq 'p') {
+				my @p = split(/ /, shift @tmp);
+				push @$parents, @p;
+				$plinks = parent_links(@p);
+				$state = 's'
+			} elsif ($state eq 's') {
+				# FIXME: excessively long subjects OOM us
+				my $s = shift @tmp;
+				$dst .= qq();
+				$dst .= utf8_html($s);
+				$dst .= '';
+				$state = 'D'
+			} elsif ($state eq 'D') {
+				# FIXME: thousands of decorations may OOM us
+				my $D = shift @tmp;
+				if ($D =~ /\AD(.+)/) {
+					$dst .= ' (';
+					$dst .= join(', ',
+						git_dec_links($rel, $1));
+					$dst .= ')';
+				}
+				$state = 'ai';
+			} elsif ($state eq 'ai') {
+				$ai = shift @tmp;
+				$state = 'an';
+			} elsif ($state eq 'an') {
+				my $an = shift @tmp;
+				$an =~ s/\Aa// or
+					die "missing 'a' from author: $an";
+				my $ah = $acache{$an} ||= utf8_html($an);
+				$dst .= "\n- $ah @ $ai\n  commit $id$plinks\n";
+				$id = $plinks = $ai = '';
+				$state = 'b';
+			}
+		}
+
+		$dst;
+	};
+}
+
+sub call_git_log {
+	my ($self, $req) = @_;
+	my $repo_info = $req->{repo_info};
+	my $max = $repo_info->{max_commit_count} || 50;
+	$max = int($max);
+	$max = 50 if $max == 0;
+	my $env = $req->{env};
+	my $q = $req->{'q'} = PublicInbox::RepobrowseGitQuery->new($env);
+	my $h = $q->{h};
+	$h eq '' and $h = 'HEAD';
+	my $git = $repo_info->{git};
+	my $git_dir = $git->{git_dir};
+
+	# n.b. no need to escape $h, this -debug line will never
+	# be seen if $h is invalid
+	# XXX but we should probably validate refnames before execve...
+	$req->{-debug} = "git log --git-dir=$git_dir $h --";
+	my $cmd = [ 'git', "--git-dir=$git_dir",
+			qw(log --no-notes --no-color --abbrev-commit),
+			$git->abbrev, $LOG_FMT, "-$max", $h, '--' ];
+	my $rdr = { 2 => $git->err_begin };
+	my $title = "log: $repo_info->{repo} (" . utf8_html($h). ')';
+	$req->{lhtml} = $self->html_start($req, $title) . "\n\n";
+	my $qsp = PublicInbox::Qspawn->new($cmd, undef, $rdr);
+	$qsp->psgi_return($env, undef, sub {
+		my ($r) = @_;
+		if (!defined $r) {
+			[ 500, [ 'Content-Type', 'text/html' ], [ $git->err ] ];
+		} elsif ($r == 0) {
+			[ 404, [ 'Content-Type', 'text/html' ], [ $git->err ] ];
+		} else {
+			$env->{'qspawn.filter'} = git_log_sed($self, $req);
+			[ 200, [ 'Content-Type', 'text/html' ] ];
+		}
+	});
 }
 
 1;
-- 
cgit v1.2.3-24-ge0c7