diff options
Diffstat (limited to 'lib/PublicInbox/SolverGit.pm')
-rw-r--r-- | lib/PublicInbox/SolverGit.pm | 150 |
1 files changed, 72 insertions, 78 deletions
diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm index d3567aa2..296e7d17 100644 --- a/lib/PublicInbox/SolverGit.pm +++ b/lib/PublicInbox/SolverGit.pm @@ -11,13 +11,16 @@ package PublicInbox::SolverGit; use strict; use v5.10.1; use File::Temp 0.19 (); # 0.19 for ->newdir +use autodie qw(mkdir); use Fcntl qw(SEEK_SET); use PublicInbox::Git qw(git_unquote git_quote); +use PublicInbox::IO qw(write_file); use PublicInbox::MsgIter qw(msg_part_text); use PublicInbox::Qspawn; use PublicInbox::Tmpfile; use PublicInbox::GitAsyncCat; use PublicInbox::Eml; +use PublicInbox::Compat qw(uniqstr); use URI::Escape qw(uri_escape_utf8); # POSIX requires _POSIX_ARG_MAX >= 4096, and xargs is required to @@ -79,11 +82,13 @@ sub solve_existing ($$) { my $try = $want->{try_gits} //= [ @{$self->{gits}} ]; # array copy my $git = shift @$try or die 'BUG {try_gits} empty'; my $oid_b = $want->{oid_b}; + + # can't use async_check due to last_check_err :< my ($oid_full, $type, $size) = $git->check($oid_b); + $git->schedule_cleanup if $self->{psgi_env}->{'pi-httpd.async'}; - # other than {oid_b, try_gits, try_ibxs} - my $have_hints = scalar keys %$want > 3; - if (defined($type) && (!$have_hints || $type eq 'blob')) { + if ($oid_b eq ($oid_full // '') || (defined($type) && + (!$self->{have_hints} || $type eq 'blob'))) { delete $want->{try_gits}; return [ $git, $oid_full, $type, int($size) ]; # done, success } @@ -106,6 +111,11 @@ sub solve_existing ($$) { scalar(@$try); } +sub _tmp { + $_[0]->{tmp} //= + File::Temp->newdir("solver.$_[0]->{oid_want}-XXXX", TMPDIR => 1); +} + sub extract_diff ($$) { my ($p, $arg) = @_; my ($self, $want, $smsg) = @$arg; @@ -193,10 +203,8 @@ sub extract_diff ($$) { my $path = ++$self->{tot}; $di->{n} = $path; - open(my $tmp, '>:utf8', $self->{tmp}->dirname . "/$path") or - die "open(tmp): $!"; - print $tmp $di->{hdr_lines}, $patch or die "print(tmp): $!"; - close $tmp or die "close(tmp): $!"; + my $f = _tmp($self)->dirname."/$path"; + write_file '>:utf8', $f, $di->{hdr_lines}, $patch; # for debugging/diagnostics: $di->{ibx} = $want->{cur_ibx}; @@ -242,14 +250,18 @@ sub find_smsgs ($$$) { sub update_index_result ($$) { my ($bref, $self) = @_; - my ($qsp, $msg) = delete @$self{qw(-qsp -msg)}; - if (my $err = $qsp->{err}) { - ERR($self, "git update-index error: $err"); - } + my ($qsp_err, $msg) = delete @$self{qw(-qsp_err -msg)}; + ERR($self, "git update-index error:$qsp_err") if $qsp_err; dbg($self, $msg); next_step($self); # onto do_git_apply } +sub qsp_qx ($$$) { + my ($self, $qsp, $cb) = @_; + $qsp->{qsp_err} = \($self->{-qsp_err} = ''); + $qsp->psgi_qx($self->{psgi_env}, $self->{limiter}, $cb, $self); +} + sub prepare_index ($) { my ($self) = @_; my $patches = $self->{patches}; @@ -278,47 +290,33 @@ sub prepare_index ($) { my $cmd = [ qw(git update-index -z --index-info) ]; my $qsp = PublicInbox::Qspawn->new($cmd, $self->{git_env}, $rdr); $path_a = git_quote($path_a); - $self->{-qsp} = $qsp; $self->{-msg} = "index prepared:\n$mode_a $oid_full\t$path_a"; - $qsp->psgi_qx($self->{psgi_env}, undef, \&update_index_result, $self); + qsp_qx $self, $qsp, \&update_index_result; } # pure Perl "git init" sub do_git_init ($) { my ($self) = @_; - my $dir = $self->{tmp}->dirname; - my $git_dir = "$dir/git"; + my $git_dir = _tmp($self)->dirname.'/git'; - foreach ('', qw(objects refs objects/info refs/heads)) { - mkdir("$git_dir/$_") or die "mkdir $_: $!"; - } - open my $fh, '>', "$git_dir/config" or die "open git/config: $!"; + mkdir("$git_dir/$_") for ('', qw(objects refs objects/info refs/heads)); my $first = $self->{gits}->[0]; my $fmt = $first->object_format; - my $v = defined($$fmt) ? 1 : 0; - print $fh <<EOF or die "print git/config $!"; + my ($v, @ext) = defined($$fmt) ? (1, <<EOM) : (0); +[extensions] + objectformat = $$fmt +EOM + write_file '>', "$git_dir/config", <<EOF, @ext; [core] repositoryFormatVersion = $v filemode = true bare = false logAllRefUpdates = false EOF - print $fh <<EOM if defined($$fmt); -[extensions] - objectformat = $$fmt -EOM - close $fh or die "close git/config: $!"; - - open $fh, '>', "$git_dir/HEAD" or die "open git/HEAD: $!"; - print $fh "ref: refs/heads/master\n" or die "print git/HEAD: $!"; - close $fh or die "close git/HEAD: $!"; - - my $f = 'objects/info/alternates'; - open $fh, '>', "$git_dir/$f" or die "open: $f: $!"; - foreach my $git (@{$self->{gits}}) { - print $fh $git->git_path('objects'),"\n" or die "print $f: $!"; - } - close $fh or die "close: $f: $!"; + write_file '>', "$git_dir/HEAD", "ref: refs/heads/master\n"; + write_file '>', "$git_dir/objects/info/alternates", map { + $_->git_path('objects')."\n" + } @{$self->{gits}}; my $tmp_git = $self->{tmp_git} = PublicInbox::Git->new($git_dir); $tmp_git->{-tmp} = $self->{tmp}; $self->{git_env} = { @@ -384,12 +382,9 @@ sub event_step ($) { } sub next_step ($) { - my ($self) = @_; # if outside of public-inbox-httpd, caller is expected to be # looping event_step, anyways - my $async = $self->{psgi_env}->{'pi-httpd.async'} or return; - # PublicInbox::HTTPD::Async->new - $async->(undef, undef, $self); + PublicInbox::DS::requeue($_[0]) if $_[0]->{psgi_env}->{'pi-httpd.async'} } sub mark_found ($$$) { @@ -405,21 +400,18 @@ sub mark_found ($$$) { sub parse_ls_files ($$) { my ($self, $bref) = @_; - my ($qsp, $di) = delete @$self{qw(-qsp -cur_di)}; - if (my $err = $qsp->{err}) { - die "git ls-files error: $err"; - } + my ($qsp_err, $di) = delete @$self{qw(-qsp_err -cur_di)}; + die "git ls-files -s -z error:$qsp_err" if $qsp_err; - my ($line, @extra) = split(/\0/, $$bref); + my @ls = split(/\0/, $$bref); + my ($line, @extra) = grep(/\t\Q$di->{path_b}\E\z/, @ls); scalar(@extra) and die "BUG: extra files in index: <", - join('> <', @extra), ">"; - + join('> <', $line, @extra), ">"; + $line // die "no \Q$di->{path_b}\E in <",join('> <', @ls), '>'; my ($info, $file) = split(/\t/, $line, 2); my ($mode_b, $oid_b_full, $stage) = split(/ /, $info); - if ($file ne $di->{path_b}) { - die + $file eq $di->{path_b} or die "BUG: index mismatch: file=$file != path_b=$di->{path_b}"; - } my $tmp_git = $self->{tmp_git} or die 'no git working tree'; my (undef, undef, $size) = $tmp_git->check($oid_b_full); @@ -454,50 +446,49 @@ sub skip_identical ($$$) { } } -sub apply_result ($$) { +sub apply_result ($$) { # qx_cb my ($bref, $self) = @_; - my ($qsp, $di) = delete @$self{qw(-qsp -cur_di)}; + my ($qsp_err, $di) = delete @$self{qw(-qsp_err -cur_di)}; dbg($self, $$bref); my $patches = $self->{patches}; - if (my $err = $qsp->{err}) { - my $msg = "git apply error: $err"; + if ($qsp_err) { + my $msg = "git apply error:$qsp_err"; my $nxt = $patches->[0]; if ($nxt && oids_same_ish($nxt->{oid_b}, $di->{oid_b})) { dbg($self, $msg); dbg($self, 'trying '.di_url($self, $nxt)); return do_git_apply($self); } else { - ERR($self, $msg); + $msg .= " (no patches left to try for $di->{oid_b})\n"; + dbg($self, $msg); + return done($self, undef); } } else { skip_identical($self, $patches, $di->{oid_b}); } my @cmd = qw(git ls-files -s -z); - $qsp = PublicInbox::Qspawn->new(\@cmd, $self->{git_env}); + my $qsp = PublicInbox::Qspawn->new(\@cmd, $self->{git_env}); $self->{-cur_di} = $di; - $self->{-qsp} = $qsp; - $qsp->psgi_qx($self->{psgi_env}, undef, \&ls_files_result, $self); + qsp_qx $self, $qsp, \&ls_files_result; } sub do_git_apply ($) { my ($self) = @_; - my $dn = $self->{tmp}->dirname; my $patches = $self->{patches}; # we need --ignore-whitespace because some patches are CRLF my @cmd = (qw(git apply --cached --ignore-whitespace --unidiff-zero --whitespace=warn --verbose)); my $len = length(join(' ', @cmd)); - my $total = $self->{tot}; my $di; # keep track of the last one for "git ls-files" my $prv_oid_b; do { my $i = ++$self->{nr}; $di = shift @$patches; - dbg($self, "\napplying [$i/$total] " . di_url($self, $di) . - "\n" . $di->{hdr_lines}); + dbg($self, "\napplying [$i/$self->{nr_p}] " . + di_url($self, $di) . "\n" . $di->{hdr_lines}); my $path = $di->{n}; $len += length($path) + 1; push @cmd, $path; @@ -505,11 +496,10 @@ sub do_git_apply ($) { } while (@$patches && $len < $ARG_SIZE_MAX && !oids_same_ish($patches->[0]->{oid_b}, $prv_oid_b)); - my $opt = { 2 => 1, -C => $dn, quiet => 1 }; + my $opt = { 2 => 1, -C => _tmp($self)->dirname, quiet => 1 }; my $qsp = PublicInbox::Qspawn->new(\@cmd, $self->{git_env}, $opt); $self->{-cur_di} = $di; - $self->{-qsp} = $qsp; - $qsp->psgi_qx($self->{psgi_env}, undef, \&apply_result, $self); + qsp_qx $self, $qsp, \&apply_result; } sub di_url ($$) { @@ -558,8 +548,9 @@ sub extract_diffs_done { my $diffs = delete $self->{tmp_diffs}; if (scalar @$diffs) { unshift @{$self->{patches}}, @$diffs; - dbg($self, "found $want->{oid_b} in " . join(" ||\n\t", - map { di_url($self, $_) } @$diffs)); + my @u = uniqstr(map { di_url($self, $_) } @$diffs); + dbg($self, "found $want->{oid_b} in " . join(" ||\n\t", @u)); + ++$self->{nr_p}; # good, we can find a path to the oid we $want, now # lets see if we need to apply more patches: @@ -641,7 +632,7 @@ sub resolve_patch ($$) { # scan through inboxes to look for emails which results in # the oid we want: - my $ibx = shift(@{$want->{try_ibxs}}) or die 'BUG: {try_ibxs} empty'; + my $ibx = shift(@{$want->{try_ibxs}}) or return done($self, undef); if (my $msgs = find_smsgs($self, $ibx, $want)) { $want->{try_smsgs} = $msgs; $want->{cur_ibx} = $ibx; @@ -655,15 +646,19 @@ sub resolve_patch ($$) { # so user_cb never references the SolverGit object sub new { my ($class, $ibx, $user_cb, $uarg) = @_; + my $gits = $ibx ? $ibx->{-repo_objs} : undef; + + # FIXME: cindex --join= is super-aggressive and may hit too many + $gits = [ @$gits[0..2] ] if $gits && @$gits > 3; - bless { - gits => $ibx->{-repo_objs}, + bless { # $ibx is undef if coderepo only (see WwwCoderepo) + gits => $gits, user_cb => $user_cb, uarg => $uarg, - # -cur_di, -qsp, -msg => temporary fields for Qspawn callbacks + # -cur_di, -qsp_err, -msg => temp fields for Qspawn callbacks # TODO: config option for searching related inboxes - inboxes => [ $ibx ], + inboxes => $ibx ? [ $ibx ] : [], }, $class; } @@ -682,17 +677,16 @@ sub solve ($$$$$) { $self->{oid_want} = $oid_want; $self->{out} = $out; $self->{seen_oid} = {}; - $self->{tot} = 0; + $self->{tot} = $self->{nr_p} = 0; $self->{psgi_env} = $env; + $self->{have_hints} = 1 if scalar keys %$hints; $self->{todo} = [ { %$hints, oid_b => $oid_want } ]; $self->{patches} = []; # [ $di, $di, ... ] $self->{found} = {}; # { abbr => [ ::Git, oid, type, size, $di ] } - $self->{tmp} = File::Temp->newdir("solver.$oid_want-XXXX", TMPDIR => 1); dbg($self, "solving $oid_want ..."); - if (my $async = $env->{'pi-httpd.async'}) { - # PublicInbox::HTTPD::Async->new - $async->(undef, undef, $self); + if ($env->{'pi-httpd.async'}) { + PublicInbox::DS::requeue($self); } else { event_step($self) while $self->{user_cb}; } |