From b140961420c0f240c9c3f55e83c52cfc3efa709d Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 17 Dec 2015 05:37:31 +0000 Subject: git: cat-file wrapper enhancements The "cat_file" sub now allows a block to be passed for partial processing. Additionally, a new "check" method is added to retrieve only object metadata: (SHA-1 identifier, type, size) --- lib/PublicInbox/GitCatFile.pm | 125 ++++++++++++++++++++++++++------------- t/git.fast-import-data | 101 +++++++++++++++++++++++++++++++ t/git.t | 134 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 318 insertions(+), 42 deletions(-) create mode 100644 t/git.fast-import-data create mode 100644 t/git.t diff --git a/lib/PublicInbox/GitCatFile.pm b/lib/PublicInbox/GitCatFile.pm index dd95d5f3..b3666a08 100644 --- a/lib/PublicInbox/GitCatFile.pm +++ b/lib/PublicInbox/GitCatFile.pm @@ -17,75 +17,116 @@ sub new { bless { git_dir => $git_dir }, $class } -sub _cat_file_begin { - my ($self) = @_; - return if $self->{pid}; +sub _bidi_pipe { + my ($self, $batch, $in, $out, $pid) = @_; + return if $self->{$pid}; my ($in_r, $in_w, $out_r, $out_w); - pipe($in_r, $in_w) or die "pipe failed: $!\n"; - pipe($out_r, $out_w) or die "pipe failed: $!\n"; + pipe($in_r, $in_w) or fail($self, "pipe failed: $!"); + pipe($out_r, $out_w) or fail($self, "pipe failed: $!"); - my @cmd = ('git', "--git-dir=$self->{git_dir}", qw(cat-file --batch)); - my $pid = fork; - defined $pid or die "fork failed: $!\n"; - if ($pid == 0) { + my @cmd = ('git', "--git-dir=$self->{git_dir}", qw(cat-file), $batch); + $self->{$pid} = fork; + defined $self->{$pid} or fail($self, "fork failed: $!"); + if ($self->{$pid} == 0) { dup2(fileno($out_r), 0) or die "redirect stdin failed: $!\n"; dup2(fileno($in_w), 1) or die "redirect stdout failed: $!\n"; exec(@cmd) or die 'exec `' . join(' '). "' failed: $!\n"; } - close $out_r or die "close failed: $!\n"; - close $in_w or die "close failed: $!\n"; + close $out_r or fail($self, "close failed: $!"); + close $in_w or fail($self, "close failed: $!"); $out_w->autoflush(1); - - $self->{in} = $in_r; - $self->{out} = $out_w; - $self->{pid} = $pid; + $self->{$out} = $out_w; + $self->{$in} = $in_r; } sub cat_file { - my ($self, $object, $sizeref) = @_; + my ($self, $obj, $ref) = @_; - $self->_cat_file_begin; - print { $self->{out} } $object, "\n" or die "pipe write error: $!\n"; + $self->_bidi_pipe(qw(--batch in out pid)); + $self->{out}->print($obj, "\n") or fail($self, "write error: $!"); my $in = $self->{in}; - my $head = <$in>; + my $head = $in->getline; $head =~ / missing$/ and return undef; $head =~ /^[0-9a-f]{40} \S+ (\d+)$/ or - die "Unexpected result from git cat-file: $head\n"; + fail($self, "Unexpected result from git cat-file: $head"); my $size = $1; - $$sizeref = $size if $sizeref; - my $bytes_left = $size; - my $offset = 0; - my $rv = ''; - - while ($bytes_left) { - my $read = read($in, $rv, $bytes_left, $offset); - defined($read) or die "sysread pipe failed: $!\n"; - $bytes_left -= $read; - $offset += $read; - } + my $ref_type = $ref ? ref($ref) : ''; + + my $rv; + my $left = $size; + $$ref = $size if ($ref_type eq 'SCALAR'); + my $cb_err; - my $read = read($in, my $buf, 1); - defined($read) or die "read pipe failed: $!\n"; - if ($read != 1 || $buf ne "\n") { - die "newline missing after blob\n"; + if ($ref_type eq 'CODE') { + $rv = eval { $ref->($in, \$left) }; + $cb_err = $@; + # drain the rest + my $max = 8192; + while ($left > 0) { + my $r = read($in, my $x, $left > $max ? $max : $left); + defined($r) or fail($self, "read failed: $!"); + $r == 0 and fail($self, 'exited unexpectedly'); + $left -= $r; + } + } else { + my $offset = 0; + my $buf = ''; + while ($left > 0) { + my $r = read($in, $buf, $left, $offset); + defined($r) or fail($self, "read failed: $!"); + $r == 0 and fail($self, 'exited unexpectedly'); + $left -= $r; + $offset += $r; + } + $rv = \$buf; } - \$rv; + + my $r = read($in, my $buf, 1); + defined($r) or fail($self, "read failed: $!"); + fail($self, 'newline missing after blob') if ($r != 1 || $buf ne "\n"); + die $cb_err if $cb_err; + + $rv; } -sub DESTROY { - my ($self) = @_; - my $pid = $self->{pid} or return; - $self->{pid} = undef; - foreach my $f (qw(in out)) { +sub check { + my ($self, $obj) = @_; + $self->_bidi_pipe(qw(--batch-check in_c out_c pid_c)); + $self->{out_c}->print($obj, "\n") or fail($self, "write error: $!"); + chomp(my $line = $self->{in_c}->getline); + my ($hex, $type, $size) = split(' ', $line); + return if $type eq 'missing'; + ($hex, $type, $size); +} + +sub _destroy { + my ($self, $in, $out, $pid) = @_; + my $p = $self->{$pid} or return; + $self->{$pid} = undef; + foreach my $f ($in, $out) { my $fh = $self->{$f}; defined $fh or next; close $fh; $self->{$f} = undef; } - waitpid $pid, 0; + waitpid $p, 0; +} + +sub fail { + my ($self, $msg) = @_; + cleanup($self); + die $msg; } +sub cleanup { + my ($self) = @_; + _destroy($self, qw(in out pid)); + _destroy($self, qw(in_c out_c pid_c)); +} + +sub DESTROY { cleanup(@_) } + 1; diff --git a/t/git.fast-import-data b/t/git.fast-import-data new file mode 100644 index 00000000..4a105ee7 --- /dev/null +++ b/t/git.fast-import-data @@ -0,0 +1,101 @@ +blob +mark :1 +data 6 +hello + +reset refs/heads/header +commit refs/heads/header +mark :2 +author AU Thor 0 +0000 +committer AU Thor 0 +0000 +data 8 +initial +M 100644 :1 foo.txt + +blob +mark :3 +data 12 +hello +world + +commit refs/heads/master +mark :4 +author AU Thor 0 +0000 +committer AU Thor 0 +0000 +data 7 +second +from :2 +M 100644 :3 foo.txt + +blob +mark :5 +data 12 +----- +hello + +commit refs/heads/header +mark :6 +author AU Thor 0 +0000 +committer AU Thor 0 +0000 +data 11 +add header +from :2 +M 100644 :5 foo.txt + +blob +mark :7 +data 18 +----- +hello +world + +commit refs/heads/master +mark :8 +author AU Thor 0 +0000 +committer AU Thor 0 +0000 +data 46 +Merge branch 'header' + +* header: + add header +from :4 +merge :6 +M 100644 :7 foo.txt + +blob +mark :9 +data 0 + +blob +mark :10 +data 16 +dir/dur/der/derp +commit refs/heads/master +mark :11 +author AU Thor 0 +0000 +committer AU Thor 0 +0000 +data 26 +add symlink and deep file +from :8 +M 100644 :9 dir/dur/der/derp +M 120000 :10 link + +blob +mark :12 +data 78 +[submodule "git"] + path = git + url = git://git.kernel.org/pub/scm/git/git.git + +commit refs/heads/master +mark :13 +author AU Thor 0 +0000 +committer AU Thor 0 +0000 +data 18 +add git submodule +from :11 +M 100644 :12 .gitmodules +M 160000 f3adf457e046f92f039353762a78dcb3afb2cb13 git + +reset refs/heads/master +from :13 diff --git a/t/git.t b/t/git.t new file mode 100644 index 00000000..4532921d --- /dev/null +++ b/t/git.t @@ -0,0 +1,134 @@ +# Copyright (C) 2015 all contributors +# License: AGPL-3.0+ (https://www.gnu.org/licenses/agpl-3.0.txt) +use strict; +use warnings; +use Test::More; +use File::Temp qw/tempdir/; +my $dir = tempdir(CLEANUP => 1); +use Cwd qw/getcwd/; + +use_ok 'PublicInbox::GitCatFile'; +{ + is(system(qw(git init -q --bare), $dir), 0, 'created git directory'); + my @cmd = ('git', "--git-dir=$dir", 'fast-import', '--quiet'); + + my $fi_data = getcwd().'/t/git.fast-import-data'; + ok(-r $fi_data, "fast-import data readable (or run test at top level)"); + my $pid = fork; + defined $pid or die "fork failed: $!\n"; + if ($pid == 0) { + open STDIN, '<', $fi_data or die "open $fi_data: $!\n"; + exec @cmd; + die "failed exec: ",join(' ', @cmd),": $!\n"; + } + waitpid $pid, 0; + is($?, 0, 'fast-import succeeded'); +} + +{ + my $gcf = PublicInbox::GitCatFile->new($dir); + my $f = 'HEAD:foo.txt'; + my @x = $gcf->check($f); + is(scalar @x, 3, 'returned 3 element array for existing file'); + like($x[0], qr/\A[a-f0-9]{40}\z/, 'returns obj ID in 1st element'); + is('blob', $x[1], 'returns obj type in 2nd element'); + like($x[2], qr/\A\d+\z/, 'returns obj size in 3rd element'); + + my $raw = $gcf->cat_file($f); + is($x[2], length($$raw), 'length matches'); + + { + my $size; + my $rv = $gcf->cat_file($f, sub { + my ($in, $left) = @_; + $size = $$left; + 'nothing' + }); + is($rv, 'nothing', 'returned from callback without reading'); + is($size, $x[2], 'set size for callback correctly'); + } + + eval { $gcf->cat_file($f, sub { die 'OMG' }) }; + like($@, qr/\bOMG\b/, 'died in callback propagated'); + is(${$gcf->cat_file($f)}, $$raw, 'not broken after failures'); + + { + my ($buf, $r); + my $rv = $gcf->cat_file($f, sub { + my ($in, $left) = @_; + $r = read($in, $buf, 2); + $$left -= $r; + 'blah' + }); + is($r, 2, 'only read 2 bytes'); + is($buf, '--', 'partial read succeeded'); + is($rv, 'blah', 'return value propagated'); + } + is(${$gcf->cat_file($f)}, $$raw, 'not broken after partial read'); +} + +if (1) { + use POSIX qw(dup2); + my @cmd = ('git', "--git-dir=$dir", qw(hash-object -w --stdin)); + + # need a big file, use the AGPL-3.0 :p + my $big_data = getcwd().'/COPYING'; + ok(-r $big_data, 'COPYING readable'); + my $size = -s $big_data; + ok($size > 8192, 'file is big enough'); + + my ($r, $w); + ok(pipe($r, $w), 'created pipe'); + + my $pid = fork; + defined $pid or die "fork failed: $!\n"; + if ($pid == 0) { + close $r; + open STDIN, '<', $big_data or die "open $big_data: $!\n"; + dup2(fileno($w), 1); + exec @cmd; + die "failed exec: ",join(' ', @cmd),": $!\n"; + } + close $w; + my $n = read $r, my $buf, 41; + waitpid $pid, 0; + is(0, $?, 'hashed object successfully'); + chomp $buf; + + my $gcf = PublicInbox::GitCatFile->new($dir); + my $rsize; + is($gcf->cat_file($buf, sub { + $rsize = ${$_[1]}; + 'x'; + }), 'x', 'checked input'); + is($rsize, $size, 'got correct size on big file'); + + my $x = $gcf->cat_file($buf, \$rsize); + is($rsize, $size, 'got correct size ref on big file'); + is(length($$x), $size, 'read correct number of bytes'); + + my $rline; + $gcf->cat_file($buf, sub { + my ($in, $left) = @_; + $rline = <$in>; + $$left -= length($rline); + }); + { + open my $fh, '<', $big_data or die "open failed: $!\n"; + is($rline, <$fh>, 'first line matches'); + }; + + my $all; + $gcf->cat_file($buf, sub { + my ($in, $left) = @_; + my $x = read($in, $all, $$left); + $$left -= $x; + }); + { + open my $fh, '<', $big_data or die "open failed: $!\n"; + local $/; + is($all, <$fh>, 'entire read matches'); + }; +} + +done_testing(); -- cgit v1.2.3-24-ge0c7