From 4c2c2325d2948ec5340e2fcafbee798cf568f5fd Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 22 Dec 2015 00:17:33 +0000 Subject: rename 'GitCatFile' package to 'Git' We'll be using it for more than just cat-file. Adding a `popen' API for internal use allows us to save a bunch of code in other places. --- lib/PublicInbox/ExtMsg.pm | 19 ++---- lib/PublicInbox/Feed.pm | 23 +++---- lib/PublicInbox/Git.pm | 142 ++++++++++++++++++++++++++++++++++++++++++ lib/PublicInbox/GitCatFile.pm | 132 --------------------------------------- lib/PublicInbox/Mbox.pm | 4 +- lib/PublicInbox/NNTP.pm | 2 +- lib/PublicInbox/NewsGroup.pm | 4 +- lib/PublicInbox/SearchIdx.pm | 18 ++---- lib/PublicInbox/SearchView.pm | 9 +-- lib/PublicInbox/View.pm | 4 +- lib/PublicInbox/WWW.pm | 16 +---- 11 files changed, 174 insertions(+), 199 deletions(-) create mode 100644 lib/PublicInbox/Git.pm delete mode 100644 lib/PublicInbox/GitCatFile.pm (limited to 'lib') diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm index 82f4c63d..0b667545 100644 --- a/lib/PublicInbox/ExtMsg.pm +++ b/lib/PublicInbox/ExtMsg.pm @@ -72,21 +72,10 @@ sub ext_msg { my $path = "HEAD:" . mid2path($mid); foreach my $n (@nox) { - my @cmd = ('git', "--git-dir=$n->{git_dir}", 'cat-file', - '-t', $path); - my $pid = open my $fh, '-|'; - defined $pid or die "fork failed: $!\n"; - - if ($pid == 0) { - open STDERR, '>', '/dev/null'; # ignore errors - exec @cmd or die "exec failed: $!\n"; - } else { - my $type = eval { local $/; <$fh> }; - close $fh; - if ($? == 0 && $type eq "blob\n") { - return r302($n->{url}, $mid); - } - } + # TODO: reuse existing PublicInbox::Git objects to save forks + my $git = PublicInbox::Git->new($n->{git_dir}); + my (undef, $type, undef) = $git->check($path); + return r302($n->{url}, $mid) if ($type eq 'blob'); } # fall back to partial MID matching diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm index 68f1e67b..150bea03 100644 --- a/lib/PublicInbox/Feed.pm +++ b/lib/PublicInbox/Feed.pm @@ -9,7 +9,7 @@ use Email::Address; use Email::MIME; use Date::Parse qw(strptime); use PublicInbox::Hval; -use PublicInbox::GitCatFile; +use PublicInbox::Git; use PublicInbox::View; use PublicInbox::MID qw/mid_clean mid2path/; use POSIX qw/strftime/; @@ -66,7 +66,7 @@ sub emit_atom { my $max = $ctx->{max} || MAX_PER_PAGE; my $feed_opts = get_feedopts($ctx); my $x = atom_header($feed_opts); - my $git = PublicInbox::GitCatFile->new($ctx->{git_dir}); + my $git = $ctx->{git} ||= PublicInbox::Git->new($ctx->{git_dir}); each_recent_blob($ctx, sub { my ($path, undef, $ts) = @_; if (defined $x) { @@ -75,7 +75,6 @@ sub emit_atom { } add_to_feed($feed_opts, $fh, $path, $git); }); - $git = undef; # destroy pipes end_feed($fh); } @@ -105,11 +104,10 @@ sub emit_atom_thread { $feed_opts->{url} = $html_url; $feed_opts->{emit_header} = 1; - my $git = PublicInbox::GitCatFile->new($ctx->{git_dir}); + my $git = $ctx->{git} ||= PublicInbox::Git->new($ctx->{git_dir}); foreach my $msg (@{$res->{msgs}}) { add_to_feed($feed_opts, $fh, mid2path($msg->mid), $git); } - $git = undef; # destroy pipes end_feed($fh); } @@ -167,7 +165,7 @@ sub emit_html_index { sub emit_index_nosrch { my ($ctx, $state, $fh) = @_; - my $git = PublicInbox::GitCatFile->new($ctx->{git_dir}); + my $git = $ctx->{git} ||= PublicInbox::Git->new($ctx->{git_dir}); my (undef, $last) = each_recent_blob($ctx, sub { my ($path, $commit, $ts, $u, $subj) = @_; $state->{first} ||= $commit; @@ -219,14 +217,11 @@ sub each_recent_blob { # get recent messages # we could use git log -z, but, we already know ssoma will not # leave us with filenames with spaces in them.. - my @cmd = ('git', "--git-dir=$ctx->{git_dir}", - qw/log --no-notes --no-color --raw -r - --abbrev=16 --abbrev-commit/, - "--format=%h%x00%ct%x00%an%x00%s%x00"); - push @cmd, $range; - - my $pid = open(my $log, '-|', @cmd) or - die('open `'.join(' ', @cmd) . " pipe failed: $!\n"); + my $git = $ctx->{git} ||= PublicInbox::Git->new($ctx->{git_dir}); + my $log = $git->popen(qw/log --no-notes --no-color --raw -r + --abbrev=16 --abbrev-commit/, + "--format=%h%x00%ct%x00%an%x00%s%x00", + $range); my %deleted; # only an optimization at this point my $last; my $nr = 0; diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm new file mode 100644 index 00000000..5135862e --- /dev/null +++ b/lib/PublicInbox/Git.pm @@ -0,0 +1,142 @@ +# Copyright (C) 2014-2015 all contributors +# License: GPLv2 or later (https://www.gnu.org/licenses/gpl-2.0.txt) +# +# Used to read files from a git repository without excessive forking. +# Used in our web interfaces as well as our -nntpd server. +# This is based on code in Git.pm which is GPLv2, but modified to avoid +# dependence on environment variables for compatibility with mod_perl. +# There are also API changes to simplify our usage and data set. +package PublicInbox::Git; +use strict; +use warnings; +use POSIX qw(dup2); +require IO::Handle; + +sub new { + my ($class, $git_dir) = @_; + bless { git_dir => $git_dir }, $class +} + +sub _bidi_pipe { + my ($self, $batch, $in, $out, $pid) = @_; + return if $self->{$pid}; + my ($in_r, $in_w, $out_r, $out_w); + + pipe($in_r, $in_w) or fail($self, "pipe failed: $!"); + pipe($out_r, $out_w) or fail($self, "pipe failed: $!"); + + my @cmd = ('git', "--git-dir=$self->{git_dir}", qw(cat-file), $batch); + $self->{$pid} = fork; + defined $self->{$pid} or fail($self, "fork failed: $!"); + if ($self->{$pid} == 0) { + dup2(fileno($out_r), 0) or die "redirect stdin failed: $!\n"; + dup2(fileno($in_w), 1) or die "redirect stdout failed: $!\n"; + exec(@cmd) or die 'exec `' . join(' '). "' failed: $!\n"; + } + close $out_r or fail($self, "close failed: $!"); + close $in_w or fail($self, "close failed: $!"); + $out_w->autoflush(1); + $self->{$out} = $out_w; + $self->{$in} = $in_r; +} + +sub cat_file { + my ($self, $obj, $ref) = @_; + + $self->_bidi_pipe(qw(--batch in out pid)); + $self->{out}->print($obj, "\n") or fail($self, "write error: $!"); + + my $in = $self->{in}; + my $head = $in->getline; + $head =~ / missing$/ and return undef; + $head =~ /^[0-9a-f]{40} \S+ (\d+)$/ or + fail($self, "Unexpected result from git cat-file: $head"); + + my $size = $1; + my $ref_type = $ref ? ref($ref) : ''; + + my $rv; + my $left = $size; + $$ref = $size if ($ref_type eq 'SCALAR'); + my $cb_err; + + if ($ref_type eq 'CODE') { + $rv = eval { $ref->($in, \$left) }; + $cb_err = $@; + # drain the rest + my $max = 8192; + while ($left > 0) { + my $r = read($in, my $x, $left > $max ? $max : $left); + defined($r) or fail($self, "read failed: $!"); + $r == 0 and fail($self, 'exited unexpectedly'); + $left -= $r; + } + } else { + my $offset = 0; + my $buf = ''; + while ($left > 0) { + my $r = read($in, $buf, $left, $offset); + defined($r) or fail($self, "read failed: $!"); + $r == 0 and fail($self, 'exited unexpectedly'); + $left -= $r; + $offset += $r; + } + $rv = \$buf; + } + + my $r = read($in, my $buf, 1); + defined($r) or fail($self, "read failed: $!"); + fail($self, 'newline missing after blob') if ($r != 1 || $buf ne "\n"); + die $cb_err if $cb_err; + + $rv; +} + +sub check { + my ($self, $obj) = @_; + $self->_bidi_pipe(qw(--batch-check in_c out_c pid_c)); + $self->{out_c}->print($obj, "\n") or fail($self, "write error: $!"); + chomp(my $line = $self->{in_c}->getline); + my ($hex, $type, $size) = split(' ', $line); + return if $type eq 'missing'; + ($hex, $type, $size); +} + +sub _destroy { + my ($self, $in, $out, $pid) = @_; + my $p = $self->{$pid} or return; + $self->{$pid} = undef; + foreach my $f ($in, $out) { + my $fh = $self->{$f}; + defined $fh or next; + close $fh; + $self->{$f} = undef; + } + waitpid $p, 0; +} + +sub fail { + my ($self, $msg) = @_; + cleanup($self); + die $msg; +} + +sub popen { + my ($self, @cmd) = @_; + my $mode = '-|'; + $mode = shift @cmd if ($cmd[0] eq '|-'); + @cmd = ('git', "--git-dir=$self->{git_dir}", @cmd); + my $pid = open my $fh, $mode, @cmd or + die('open `'.join(' ', @cmd) . " pipe failed: $!\n"); + $fh; +} + +sub cleanup { + my ($self) = @_; + _destroy($self, qw(in out pid)); + _destroy($self, qw(in_c out_c pid_c)); +} + +sub DESTROY { cleanup(@_) } + +1; diff --git a/lib/PublicInbox/GitCatFile.pm b/lib/PublicInbox/GitCatFile.pm deleted file mode 100644 index b3666a08..00000000 --- a/lib/PublicInbox/GitCatFile.pm +++ /dev/null @@ -1,132 +0,0 @@ -# Copyright (C) 2014-2015 all contributors -# License: GPLv2 or later (https://www.gnu.org/licenses/gpl-2.0.txt) -# -# Used to read files from a git repository without excessive forking. -# Used in our web interfaces as well as our -nntpd server. -# This is based on code in Git.pm which is GPLv2, but modified to avoid -# dependence on environment variables for compatibility with mod_perl. -# There are also API changes to simplify our usage and data set. -package PublicInbox::GitCatFile; -use strict; -use warnings; -use POSIX qw(dup2); -require IO::Handle; - -sub new { - my ($class, $git_dir) = @_; - bless { git_dir => $git_dir }, $class -} - -sub _bidi_pipe { - my ($self, $batch, $in, $out, $pid) = @_; - return if $self->{$pid}; - my ($in_r, $in_w, $out_r, $out_w); - - pipe($in_r, $in_w) or fail($self, "pipe failed: $!"); - pipe($out_r, $out_w) or fail($self, "pipe failed: $!"); - - my @cmd = ('git', "--git-dir=$self->{git_dir}", qw(cat-file), $batch); - $self->{$pid} = fork; - defined $self->{$pid} or fail($self, "fork failed: $!"); - if ($self->{$pid} == 0) { - dup2(fileno($out_r), 0) or die "redirect stdin failed: $!\n"; - dup2(fileno($in_w), 1) or die "redirect stdout failed: $!\n"; - exec(@cmd) or die 'exec `' . join(' '). "' failed: $!\n"; - } - close $out_r or fail($self, "close failed: $!"); - close $in_w or fail($self, "close failed: $!"); - $out_w->autoflush(1); - $self->{$out} = $out_w; - $self->{$in} = $in_r; -} - -sub cat_file { - my ($self, $obj, $ref) = @_; - - $self->_bidi_pipe(qw(--batch in out pid)); - $self->{out}->print($obj, "\n") or fail($self, "write error: $!"); - - my $in = $self->{in}; - my $head = $in->getline; - $head =~ / missing$/ and return undef; - $head =~ /^[0-9a-f]{40} \S+ (\d+)$/ or - fail($self, "Unexpected result from git cat-file: $head"); - - my $size = $1; - my $ref_type = $ref ? ref($ref) : ''; - - my $rv; - my $left = $size; - $$ref = $size if ($ref_type eq 'SCALAR'); - my $cb_err; - - if ($ref_type eq 'CODE') { - $rv = eval { $ref->($in, \$left) }; - $cb_err = $@; - # drain the rest - my $max = 8192; - while ($left > 0) { - my $r = read($in, my $x, $left > $max ? $max : $left); - defined($r) or fail($self, "read failed: $!"); - $r == 0 and fail($self, 'exited unexpectedly'); - $left -= $r; - } - } else { - my $offset = 0; - my $buf = ''; - while ($left > 0) { - my $r = read($in, $buf, $left, $offset); - defined($r) or fail($self, "read failed: $!"); - $r == 0 and fail($self, 'exited unexpectedly'); - $left -= $r; - $offset += $r; - } - $rv = \$buf; - } - - my $r = read($in, my $buf, 1); - defined($r) or fail($self, "read failed: $!"); - fail($self, 'newline missing after blob') if ($r != 1 || $buf ne "\n"); - die $cb_err if $cb_err; - - $rv; -} - -sub check { - my ($self, $obj) = @_; - $self->_bidi_pipe(qw(--batch-check in_c out_c pid_c)); - $self->{out_c}->print($obj, "\n") or fail($self, "write error: $!"); - chomp(my $line = $self->{in_c}->getline); - my ($hex, $type, $size) = split(' ', $line); - return if $type eq 'missing'; - ($hex, $type, $size); -} - -sub _destroy { - my ($self, $in, $out, $pid) = @_; - my $p = $self->{$pid} or return; - $self->{$pid} = undef; - foreach my $f ($in, $out) { - my $fh = $self->{$f}; - defined $fh or next; - close $fh; - $self->{$f} = undef; - } - waitpid $p, 0; -} - -sub fail { - my ($self, $msg) = @_; - cleanup($self); - die $msg; -} - -sub cleanup { - my ($self) = @_; - _destroy($self, qw(in out pid)); - _destroy($self, qw(in_c out_c pid_c)); -} - -sub DESTROY { cleanup(@_) } - -1; diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm index c180a0dc..0d67981f 100644 --- a/lib/PublicInbox/Mbox.pm +++ b/lib/PublicInbox/Mbox.pm @@ -86,9 +86,9 @@ sub emit_mbox { my $fh = $response->([200, ['Content-Type' => "application/$type"]]); $fh = PublicInbox::MboxGz->new($fh) if $sfx; - require PublicInbox::GitCatFile; + require PublicInbox::Git; my $mid = $ctx->{mid}; - my $git = PublicInbox::GitCatFile->new($ctx->{git_dir}); + my $git = $ctx->{git} ||= PublicInbox::Git->new($ctx->{git_dir}); my %opts = (offset => 0); my $nr; do { diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm index 295aee0e..097c57e9 100644 --- a/lib/PublicInbox/NNTP.pm +++ b/lib/PublicInbox/NNTP.pm @@ -9,7 +9,7 @@ use base qw(Danga::Socket); use fields qw(nntpd article rbuf ng long_res); use PublicInbox::Search; use PublicInbox::Msgmap; -use PublicInbox::GitCatFile; +use PublicInbox::Git; use PublicInbox::MID qw(mid2path); use Email::MIME; use Data::Dumper qw(Dumper); diff --git a/lib/PublicInbox/NewsGroup.pm b/lib/PublicInbox/NewsGroup.pm index 3a318958..b20180e6 100644 --- a/lib/PublicInbox/NewsGroup.pm +++ b/lib/PublicInbox/NewsGroup.pm @@ -10,7 +10,7 @@ use Scalar::Util qw(weaken); require Danga::Socket; require PublicInbox::Msgmap; require PublicInbox::Search; -require PublicInbox::GitCatFile; +require PublicInbox::Git; sub new { my ($class, $name, $git_dir, $address) = @_; @@ -32,7 +32,7 @@ sub defer_weaken { sub gcf { my ($self) = @_; $self->{gcf} ||= eval { - my $gcf = PublicInbox::GitCatFile->new($self->{git_dir}); + my $gcf = PublicInbox::Git->new($self->{git_dir}); # git repos may be repacked and old packs unlinked defer_weaken($self, 'gcf'); diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index e9af547f..67272997 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -11,6 +11,7 @@ use strict; use warnings; use base qw(PublicInbox::Search); use PublicInbox::MID qw/mid_clean id_compress/; +require PublicInbox::Git; *xpfx = *PublicInbox::Search::xpfx; use constant MAX_MID_SIZE => 244; # max term size - 1 in Xapian @@ -331,16 +332,11 @@ sub rlog { my $h40 = $hex .'{40}'; my $addmsg = qr!^:000000 100644 \S+ ($h40) A\t${hex}{2}/${hex}{38}$!; my $delmsg = qr!^:100644 000000 ($h40) \S+ D\t${hex}{2}/${hex}{38}$!; - my $git_dir = $self->{git_dir}; - require PublicInbox::GitCatFile; - my $git = PublicInbox::GitCatFile->new($git_dir); - my @cmd = ('git', "--git-dir=$git_dir", "log", - qw/--reverse --no-notes --no-color --raw -r --no-abbrev/, - $range); + my $git = PublicInbox::Git->new($self->{git_dir}); + my $log = $git->popen(qw/log --reverse --no-notes --no-color + --raw -r --no-abbrev/, $range); my $latest; my $bytes; - my $pid = open(my $log, '-|', @cmd) or - die('open` '.join(' ', @cmd) . " pipe failed: $!\n"); while (my $line = <$log>) { if ($line =~ /$addmsg/o) { my $mime = do_cat_mail($git, $1, \$bytes) or next; @@ -447,10 +443,8 @@ sub merge_threads { sub _read_git_config_perm { my ($self) = @_; - my @cmd = ('git', "--git-dir=$self->{git_dir}", - qw(config core.sharedRepository)); - my $pid = open(my $fh, '-|', @cmd) or - die('open `'.join(' ', @cmd) . " pipe failed: $!\n"); + my @cmd = qw(config core.sharedRepository); + my $fh = PublicInbox::Git->new($self->{git_dir})->popen(@cmd); my $perm = <$fh>; close $fh; chomp $perm if defined $perm; diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index ea8a45a4..fec4f39f 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -10,6 +10,7 @@ use PublicInbox::Hval; use PublicInbox::View; use PublicInbox::MID qw(mid2path mid_clean); use Email::MIME; +require PublicInbox::Git; our $LIM = 50; sub sres_top_html { @@ -169,12 +170,10 @@ sub tdump { $th->order(*PublicInbox::View::rsort_ts); } - require PublicInbox::GitCatFile; - my $git = PublicInbox::GitCatFile->new($ctx->{git_dir}); + my $git = $ctx->{git} ||= PublicInbox::Git->new($ctx->{git_dir}); my $state = { ctx => $ctx, anchor_idx => 0, pct => \%pct }; $ctx->{searchview} = 1; tdump_ent($fh, $git, $state, $_, 0) for $th->rootset; - $git = undef; Email::Address->purge_cache; $fh->write(search_nav_bot($mset, $q). "\n\n" . @@ -236,8 +235,7 @@ sub html_start { sub adump { my ($cb, $mset, $q, $ctx) = @_; my $fh = $cb->([ 200, ['Content-Type' => 'application/atom+xml']]); - require PublicInbox::GitCatFile; - my $git = PublicInbox::GitCatFile->new($ctx->{git_dir}); + my $git = $ctx->{git_dir} ||= PublicInbox::Git->new($ctx->{git_dir}); my $feed_opts = PublicInbox::Feed::get_feedopts($ctx); my $x = PublicInbox::Hval->new_oneline($q->{q})->as_html; $x = qq{$x - search results}; @@ -251,7 +249,6 @@ sub adump { $x = mid2path($x); PublicInbox::Feed::add_to_feed($feed_opts, $fh, $x, $git); } - $git = undef; PublicInbox::Feed::end_feed($fh); } diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index 68741c5e..2ca7f95c 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -183,8 +183,8 @@ sub emit_thread_html { anchor_idx => 0, }; - require PublicInbox::GitCatFile; - my $git = PublicInbox::GitCatFile->new($ctx->{git_dir}); + require PublicInbox::Git; + my $git = $ctx->{git} ||= PublicInbox::Git->new($ctx->{git_dir}); if ($flat) { pre_anchor_entry($seen, $_) for (@$msgs); __thread_entry(\$cb, $git, $state, $_, 0) for (@$msgs); diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index 5cd3bc6f..ee414e85 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -17,6 +17,7 @@ use PublicInbox::Config qw(try_cat); use URI::Escape qw(uri_escape_utf8 uri_unescape); use constant SSOMA_URL => 'http://ssoma.public-inbox.org/'; use constant PI_URL => 'http://public-inbox.org/'; +require PublicInbox::Git; our $LISTNAME_RE = qr!\A/([\w\.\-]+)!; our $MID_RE = qr!([^/]+)!; our $END_RE = qr!(f/|T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!; @@ -62,7 +63,6 @@ sub preload { require PublicInbox::Feed; require PublicInbox::View; require PublicInbox::Thread; - require PublicInbox::GitCatFile; require Email::MIME; require Digest::SHA; require POSIX; @@ -96,6 +96,7 @@ sub invalid_list { my $git_dir = $pi_config->get($listname, "mainrepo"); if (defined $git_dir) { $ctx->{git_dir} = $git_dir; + $ctx->{git} = PublicInbox::Git->new($git_dir); $ctx->{listname} = $listname; return; } @@ -146,18 +147,7 @@ sub mid2blob { my ($ctx) = @_; require PublicInbox::MID; my $path = PublicInbox::MID::mid2path($ctx->{mid}); - my @cmd = ('git', "--git-dir=$ctx->{git_dir}", - qw(cat-file blob), "HEAD:$path"); - my $pid = open my $fh, '-|'; - defined $pid or die "fork failed: $!\n"; - if ($pid == 0) { - open STDERR, '>', '/dev/null'; # ignore errors - exec @cmd or die "exec failed: $!\n"; - } else { - my $blob = eval { local $/; <$fh> }; - close $fh; - $? == 0 ? \$blob : undef; - } + $ctx->{git}->cat_file("HEAD:$path"); } # /$LISTNAME/$MESSAGE_ID/raw -> raw mbox -- cgit v1.2.3-24-ge0c7