about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2017-01-13 02:13:18 +0000
committerEric Wong <e@80x24.org>2017-01-13 07:16:32 +0000
commitbec2d26785b20793de0169b30d0af67fd20fcd4e (patch)
tree0641f3d541b49a1b97244bf438dd46a108f73cb3
parent0eaf06c1c2acc65ebc7e45d0d4913958264c3dd1 (diff)
downloadpublic-inbox-bec2d26785b20793de0169b30d0af67fd20fcd4e.tar.gz
This will prevent too many processes from being spawned at once
while also allowing us to respond to backpressure from slow
clients.
-rw-r--r--lib/PublicInbox/RepobrowseGitLog.pm227
1 files changed, 156 insertions, 71 deletions
diff --git a/lib/PublicInbox/RepobrowseGitLog.pm b/lib/PublicInbox/RepobrowseGitLog.pm
index 44b405bc..e62486ba 100644
--- a/lib/PublicInbox/RepobrowseGitLog.pm
+++ b/lib/PublicInbox/RepobrowseGitLog.pm
@@ -8,95 +8,180 @@ use warnings;
 use PublicInbox::Hval qw(utf8_html);
 use base qw(PublicInbox::RepobrowseBase);
 use PublicInbox::RepobrowseGit qw(git_dec_links git_commit_title);
+use PublicInbox::Qspawn;
 # cannot rely on --date=format-local:... yet, it is too new (September 2015)
 my $LOG_FMT = '--pretty=tformat:'.
                 join('%x00', qw(%h %p %s D%D %ai a%an b%b), '', '');
 
-sub call_git_log {
-        my ($self, $req) = @_;
-        my $repo_info = $req->{repo_info};
-        my $max = $repo_info->{max_commit_count} || 50;
-        $max = int($max);
-        $max = 50 if $max == 0;
-
-        my $q = PublicInbox::RepobrowseGitQuery->new($req->{env});
-        my $h = $q->{h};
-        $h eq '' and $h = 'HEAD';
-
-        my $git = $repo_info->{git};
-        my $log = $git->popen(qw(log --no-notes --no-color --abbrev-commit),
-                                $git->abbrev, $LOG_FMT, "-$max", $h, '--');
-        sub {
-                my ($res) = @_; # Plack callback
-                my $fh = $res->([200, ['Content-Type'=>'text/html']]);
-                my $title = "log: $repo_info->{repo} ".utf8_html("($h)");
-                $fh->write($self->html_start($req, $title));
-                git_log_stream($req, $q, $log, $fh, $git);
-                $fh->close;
+sub parent_links {
+        if (@_ == 1) { # typical, single-parent commit
+                qq( / parent <a\nhref="#p$_[0]">$_[0]</a>);
+        } elsif (@_ > 0) { # merge commit
+                ' / parents ' .
+                        join(' ', map { qq(<a\nhref="#p$_">$_</a>) } @_);
+        } else {
+                ''; # root commit
         }
 }
 
-sub git_log_stream {
-        my ($req, $q, $log, $fh, $git) = @_;
-
-        my $rel = $req->{relcmd};
-        my %acache;
-        local $/ = "\0\0\n";
-        my $nr = 0;
-        my (@parents, %seen);
-        while (defined(my $line = <$log>)) {
-                my ($id, $p, $s, $D, $ai, $an, $b) = split("\0", $line);
-                $seen{$id} = 1;
-                my @p = split(' ', $p);
-                push @parents, @p;
-                my $plinks;
-                if (@p == 1) { # typical, single-parent commit
-                        $plinks = qq( / parent <a\nhref="#p$p[0]">$p[0]</a>);
-                } elsif (@p > 0) { # merge commit
-                        $plinks = ' / parents ' . join(' ', map {
-                                qq(<a\nhref="#p$_">$_</a>);
-                                } @p);
-                } else {
-                        $plinks = ''; # root commit
-                }
-
-                $s = utf8_html($s);
-                $s = qq(<a\nid=p$id\nhref="${rel}commit?id=$id"><b>$s</b></a>);
-                if ($D =~ /\AD(.+)/) {
-                        $s .= ' ('. join(', ', git_dec_links($rel, $1)) . ')';
-                }
-
-                $an =~ s/\Aa//;
-                $b =~ s/\Ab//;
-                $b =~ s/\s*\z//s;
-
-                my $ah = $acache{$an} ||= utf8_html($an);
-                my $nl = $b eq '' ? '' : "\n"; # empty bodies :<
-                $b = "$s\n- $ah @ $ai\n  commit $id$plinks\n$nl" .
-                        utf8_html($b);
-                $fh->write("\n\n" .$b);
-                ++$nr;
-        }
-
+sub git_log_sed_end ($$) {
+        my $req = $_[0];
+        my $dst = delete $req->{lhtml} || '';
+        $dst .= utf8_html($_[1]); # existing buffer
+        $dst .= '</pre><hr /><pre>';
         my $m = '';
         my $np = 0;
-        foreach my $p (@parents) {
-                next if $seen{$p};
-                $seen{$p} = ++$np;
+        my $seen = $req->{seen};
+        my $git = $req->{repo_info}->{git};
+        my $rel = $req->{relcmd};
+        foreach my $p (@{$req->{parents}}) {
+                next if $seen->{$p};
+                $seen->{$p} = ++$np;
                 my $s = git_commit_title($git, $p);
                 $m .= qq(\n<a\nid=p$p\nhref="?h=$p">$p</a>\t);
                 $s = defined($s) ? utf8_html($s) : '';
                 $m .= qq(<a\nhref="${rel}commit?id=$p">$s</a>);
         }
-        my $foot = "</pre><hr /><pre>";
         if ($np == 0) {
-                $foot .= "No commits follow";
+                $dst .= "No commits follow";
         } elsif ($np > 1) {
-                $foot .= "Unseen parent commits to follow (multiple choice):\n";
+                $dst .= "Unseen parent commits to follow (multiple choice):\n";
         } else {
-                $foot .= "Next parent to follow:\n";
+                $dst .= "Next parent to follow:\n";
         }
-        $fh->write($foot .= $m . '</pre></body></html>');
+        $dst .= $m;
+        $dst .= '</pre></body></html>';
+}
+
+sub git_log_sed ($$) {
+        my ($self, $req) = @_;
+        my $buf = '';
+        my $state = 'h';
+        my %acache;
+        my $rel = $req->{relcmd};
+        my $seen = $req->{seen} = {};
+        my $parents = $req->{parents} = [];
+        my ($plinks, $id, $ai);
+        sub {
+                my $dst;
+                # $_[0] == scalar buffer, undef means EOF from "git log"
+                return git_log_sed_end($req, $buf) unless defined $_[0];
+                $dst = delete $req->{lhtml} || '';
+                my @tmp;
+                $buf .= $_[0];
+                @tmp = split(/\0/, $buf, -1);
+                $buf = @tmp ? pop(@tmp) : '';
+
+                while (@tmp) {
+                        if ($state eq 'b') {
+                                my $bb = shift @tmp;
+                                $state = 'B' if $bb =~ s/\Ab/\n/;
+                                my @lines = split(/\n/, $bb);
+                                $bb = utf8_html(pop @lines);
+                                $dst .= utf8_html($_)."\n" for @lines;
+                                $dst .= $bb;
+                        } elsif ($state eq 'B') {
+                                my $bb = shift @tmp;
+                                if ($bb eq '') {
+                                        $state = 'BB';
+                                } else {
+                                        my @lines = split(/\n/, $bb);
+                                        $bb = undef;
+                                        my $last = utf8_html(pop @lines);
+                                        $dst .= utf8_html($_)."\n" for @lines;
+                                        $dst .= $last;
+                                }
+                        } elsif ($state eq 'BB') {
+                                if ($tmp[0] =~ s/\A\n//s) {
+                                        $state = 'h';
+                                } else {
+                                        @tmp = ();
+                                        warn 'Bad state BB in log parser: ',
+                                                $req->{-debug};
+                                }
+                        } elsif ($state eq 'h') {
+                                if (scalar keys %$seen) {
+                                        $dst .= '</pre><hr /><pre>';
+                                }
+                                $id = shift @tmp;
+                                $seen->{$id} = 1;
+                                $state = 'p'
+                        } elsif ($state eq 'p') {
+                                my @p = split(/ /, shift @tmp);
+                                push @$parents, @p;
+                                $plinks = parent_links(@p);
+                                $state = 's'
+                        } elsif ($state eq 's') {
+                                # FIXME: excessively long subjects OOM us
+                                my $s = shift @tmp;
+                                $dst .= qq(<a\nid=p$id\n);
+                                $dst .= qq(href="${rel}commit?id=$id"><b>);
+                                $dst .= utf8_html($s);
+                                $dst .= '</b></a>';
+                                $state = 'D'
+                        } elsif ($state eq 'D') {
+                                # FIXME: thousands of decorations may OOM us
+                                my $D = shift @tmp;
+                                if ($D =~ /\AD(.+)/) {
+                                        $dst .= ' (';
+                                        $dst .= join(', ',
+                                                git_dec_links($rel, $1));
+                                        $dst .= ')';
+                                }
+                                $state = 'ai';
+                        } elsif ($state eq 'ai') {
+                                $ai = shift @tmp;
+                                $state = 'an';
+                        } elsif ($state eq 'an') {
+                                my $an = shift @tmp;
+                                $an =~ s/\Aa// or
+                                        die "missing 'a' from author: $an";
+                                my $ah = $acache{$an} ||= utf8_html($an);
+                                $dst .= "\n- $ah @ $ai\n  commit $id$plinks\n";
+                                $id = $plinks = $ai = '';
+                                $state = 'b';
+                        }
+                }
+
+                $dst;
+        };
+}
+
+sub call_git_log {
+        my ($self, $req) = @_;
+        my $repo_info = $req->{repo_info};
+        my $max = $repo_info->{max_commit_count} || 50;
+        $max = int($max);
+        $max = 50 if $max == 0;
+        my $env = $req->{env};
+        my $q = $req->{'q'} = PublicInbox::RepobrowseGitQuery->new($env);
+        my $h = $q->{h};
+        $h eq '' and $h = 'HEAD';
+        my $git = $repo_info->{git};
+        my $git_dir = $git->{git_dir};
+
+        # n.b. no need to escape $h, this -debug line will never
+        # be seen if $h is invalid
+        # XXX but we should probably validate refnames before execve...
+        $req->{-debug} = "git log --git-dir=$git_dir $h --";
+        my $cmd = [ 'git', "--git-dir=$git_dir",
+                        qw(log --no-notes --no-color --abbrev-commit),
+                        $git->abbrev, $LOG_FMT, "-$max", $h, '--' ];
+        my $rdr = { 2 => $git->err_begin };
+        my $title = "log: $repo_info->{repo} (" . utf8_html($h). ')';
+        $req->{lhtml} = $self->html_start($req, $title) . "\n\n";
+        my $qsp = PublicInbox::Qspawn->new($cmd, undef, $rdr);
+        $qsp->psgi_return($env, undef, sub {
+                my ($r) = @_;
+                if (!defined $r) {
+                        [ 500, [ 'Content-Type', 'text/html' ], [ $git->err ] ];
+                } elsif ($r == 0) {
+                        [ 404, [ 'Content-Type', 'text/html' ], [ $git->err ] ];
+                } else {
+                        $env->{'qspawn.filter'} = git_log_sed($self, $req);
+                        [ 200, [ 'Content-Type', 'text/html' ] ];
+                }
+        });
 }
 
 1;