From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 8352721421 for ; Mon, 21 Jan 2019 20:52:55 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 08/37] solver: various bugfixes and cleanups Date: Mon, 21 Jan 2019 20:52:24 +0000 Message-Id: <20190121205253.10455-9-e@80x24.org> In-Reply-To: <20190121205253.10455-1-e@80x24.org> References: <20190121205253.10455-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Remove the make_path dependency and call mkdir directly. Capture mode on new files, avoid referencing non-existent functions and enhance the debug output for users to read. --- lib/PublicInbox/SolverGit.pm | 87 ++++++++++++++++++++++++------------ 1 file changed, 58 insertions(+), 29 deletions(-) diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm index f28768a..d7209e6 100644 --- a/lib/PublicInbox/SolverGit.pm +++ b/lib/PublicInbox/SolverGit.pm @@ -12,8 +12,7 @@ use strict; use warnings; use File::Temp qw(); use Fcntl qw(SEEK_SET); -use File::Path qw(make_path); -use PublicInbox::Git qw(git_unquote); +use PublicInbox::Git qw(git_unquote git_quote); use PublicInbox::Spawn qw(spawn popen_rd); use PublicInbox::MsgIter qw(msg_iter msg_part_text); use URI::Escape qw(uri_escape_utf8); @@ -31,15 +30,31 @@ sub new { } # look for existing blobs already in git repos -sub solve_existing ($$) { - my ($self, $want) = @_; +sub solve_existing ($$$) { + my ($self, $out, $want) = @_; + my $oid_b = $want->{oid_b}; + my @ambiguous; # Array of [ git, $oids] foreach my $git (@{$self->{gits}}) { - my ($oid_full, $type, $size) = $git->check($want->{oid_b}); + my ($oid_full, $type, $size) = $git->check($oid_b); if (defined($type) && $type eq 'blob') { return [ $git, $oid_full, $type, int($size) ]; } + + next if length($oid_b) == 40; + + # parse stderr of "git cat-file --batch-check" + my $err = $git->last_check_err; + my (@oids) = ($err =~ /\b([a-f0-9]{40})\s+blob\b/g); + next unless scalar(@oids); + + # TODO: do something with the ambiguous array? + # push @ambiguous, [ $git, @oids ]; + + print $out "`$oid_b' ambiguous in ", + join("\n", $git->pub_urls), "\n", + join('', map { "$_ blob\n" } @oids), "\n"; } - undef; + scalar(@ambiguous) ? \@ambiguous : undef; } # returns a hashref with information about a diff: @@ -64,19 +79,22 @@ sub extract_diff ($$$$) { defined $s or return; my $di = {}; foreach my $l (split(/^/m, $s)) { - if ($l =~ /$re/) { + if ($l =~ $re) { $di->{oid_a} = $1; $di->{oid_b} = $2; - my $mode_a = $3; - if ($mode_a =~ /\A(?:100644|120000|100755)\z/) { - $di->{mode_a} = $mode_a; + if (defined($3)) { + my $mode_a = $3; + if ($mode_a =~ /\A(?:100644|120000|100755)\z/) { + $di->{mode_a} = $mode_a; + } } # start writing the diff out to a tempfile open($tmp, '+>', undef) or die "open(tmp): $!"; $di->{tmp} = $tmp; - $di->{hdr_lines} = $hdr_lines; + push @$hdr_lines, $l; + $di->{hdr_lines} = $hdr_lines; print $tmp @$hdr_lines, $l or die "print(tmp): $!"; # for debugging/diagnostics: @@ -103,6 +121,9 @@ sub extract_diff ($$$$) { print $tmp $l or die "print(tmp): $!"; } elsif ($hdr_lines) { push @$hdr_lines, $l; + if ($l =~ /\Anew file mode (100644|120000|100755)$/) { + $di->{mode_a} = $1; + } } } $tmp ? $di : undef; @@ -154,8 +175,8 @@ sub do_git_init_wt ($) { my $wt = File::Temp->newdir('solver.wt-XXXXXXXX', TMPDIR => 1); my $dir = $wt->dirname; - foreach (qw(objects/info refs/heads)) { - make_path("$dir/.git/$_") or die "make_path $_: $!"; + foreach ('', qw(objects refs objects/info refs/heads)) { + mkdir("$dir/.git/$_") or die "mkdir $_: $!"; } open my $fh, '>', "$dir/.git/config" or die "open .git/config: $!"; print $fh <<'EOF' or die "print .git/config $!"; @@ -174,9 +195,8 @@ EOF my $f = '.git/objects/info/alternates'; open $fh, '>', "$dir/$f" or die "open: $f: $!"; - foreach my $git (@{$self->{gits}}) { - print $fh "$git->{git_dir}/objects\n" or die "print $f: $!"; - } + print($fh (map { "$_->{git_dir}/objects\n" } @{$self->{gits}})) or + die "print $f: $!"; close $fh or die "close: $f: $!"; $wt; } @@ -195,8 +215,8 @@ sub reap ($$) { $? == 0 or die "$msg failed: $?"; } -sub prepare_wt ($$$) { - my ($wt_dir, $existing, $di) = @_; +sub prepare_wt ($$$$) { + my ($out, $wt_dir, $existing, $di) = @_; my $oid_full = $existing->[1]; my ($r, $w); my $path_a = $di->{path_a} or die "BUG: path_a missing for $oid_full"; @@ -208,17 +228,21 @@ sub prepare_wt ($$$) { my $pid = spawn([@git, qw(update-index -z --index-info)], {}, $rdr); close $r or die "close pipe(r): $!"; print $w "$mode_a $oid_full\t$path_a\0" or die "print update-index: $!"; + close $w or die "close update-index: $!"; reap($pid, 'update-index -z --index-info'); $pid = spawn([@git, qw(checkout-index -a -f -u)]); reap($pid, 'checkout-index -a -f -u'); + + print $out "Working tree prepared:\n", + "$mode_a $oid_full\t", git_quote($path_a), "\n"; } sub do_apply ($$$$) { my ($out, $wt_git, $wt_dir, $di) = @_; - my $tmp = delete $di->{tmp} or die "BUG: no tmp ", di_info($di); + my $tmp = delete $di->{tmp} or die "BUG: no tmp ", di_url($di); $tmp->flush or die "tmp->flush failed: $!"; $out->flush or die "err->flush failed: $!"; sysseek($tmp, 0, SEEK_SET) or die "sysseek(tmp) failed: $!"; @@ -257,7 +281,7 @@ sub di_url ($) { # can have different HTTP_HOST on the same instance. my $url = $di->{ibx}->base_url; my $mid = $di->{smsg}->{mid}; - defined($url) ? "<$url/$mid/>" : "<$mid>"; + defined($url) ? "<$url$mid/>" : "<$mid>"; } sub apply_patches ($$$$$) { @@ -275,7 +299,7 @@ sub apply_patches ($$$$$) { my $existing = $found->{$oid_a}; my $empty_oid = $oid_a =~ /\A0+\z/; - if ($empty_oid && $i != 0) { + if ($empty_oid && $i != 1) { die "empty oid at [$i/$tot] ", di_url($di); } if (!$existing && !$empty_oid) { @@ -284,13 +308,13 @@ sub apply_patches ($$$$$) { # prepare the worktree for patch application: if ($i == 1 && $existing) { - prepare_wt($wt_dir, $existing, $di); + prepare_wt($out, $wt_dir, $existing, $di); } - unless (-f "$wt_dir/$di->{path_a}") { + if (!$empty_oid && ! -f "$wt_dir/$di->{path_a}") { die "missing $di->{path_a} at [$i/$tot] ", di_url($di); } - print $out "applying [$i/$tot] ", di_url($di), "\n", + print $out "\napplying [$i/$tot] ", di_url($di), "\n", join('', @{$di->{hdr_lines}}), "\n" or die "print \$out failed: $!"; @@ -302,8 +326,8 @@ sub apply_patches ($$$$$) { sub dump_found ($$) { my ($out, $found) = @_; foreach my $oid (sort keys %$found) { - my ($git, $oid, $di) = @{$found->{$oid}}; - my $loc = $di ? di_info($di) : $git->src_blob_url($oid); + my ($git, $oid, undef, undef, $di) = @{$found->{$oid}}; + my $loc = $di ? di_url($di) : $git->src_blob_url($oid); print $out "$oid from $loc\n"; } } @@ -330,7 +354,7 @@ sub solve ($$$$) { my $req = { %$hints, oid_b => $oid_b }; my @todo = ($req); - my $found = {}; # { oid_abbrev => [ PublicInbox::Git, oid_full, $di ] } + my $found = {}; # { abbrev => [ ::Git, oid_full, type, size, $di ] } my $patches = []; # [ array of $di hashes ] my $max = $self->{max_steps} || 200; @@ -338,9 +362,14 @@ sub solve ($$$$) { while (defined(my $want = pop @todo)) { # see if we can find the blob in an existing git repo: - if (my $existing = solve_existing($self, $want)) { + if (my $existing = solve_existing($self, $out, $want)) { my $want_oid = $want->{oid_b}; - return $existing if $want_oid eq $oid_b; # DONE! + if ($want_oid eq $oid_b) { # DONE! + my @pub_urls = $existing->[0]->pub_urls; + print $out "found $want_oid in ", + join("\n", @pub_urls),"\n"; + return $existing; + } $found->{$want_oid} = $existing; next; # ok, one blob resolved, more to go? -- EW