about summary refs log tree commit homepage
path: root/lib/PublicInbox/SolverGit.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/PublicInbox/SolverGit.pm')
-rw-r--r--lib/PublicInbox/SolverGit.pm150
1 files changed, 72 insertions, 78 deletions
diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
index d3567aa2..296e7d17 100644
--- a/lib/PublicInbox/SolverGit.pm
+++ b/lib/PublicInbox/SolverGit.pm
@@ -11,13 +11,16 @@ package PublicInbox::SolverGit;
 use strict;
 use v5.10.1;
 use File::Temp 0.19 (); # 0.19 for ->newdir
+use autodie qw(mkdir);
 use Fcntl qw(SEEK_SET);
 use PublicInbox::Git qw(git_unquote git_quote);
+use PublicInbox::IO qw(write_file);
 use PublicInbox::MsgIter qw(msg_part_text);
 use PublicInbox::Qspawn;
 use PublicInbox::Tmpfile;
 use PublicInbox::GitAsyncCat;
 use PublicInbox::Eml;
+use PublicInbox::Compat qw(uniqstr);
 use URI::Escape qw(uri_escape_utf8);
 
 # POSIX requires _POSIX_ARG_MAX >= 4096, and xargs is required to
@@ -79,11 +82,13 @@ sub solve_existing ($$) {
         my $try = $want->{try_gits} //= [ @{$self->{gits}} ]; # array copy
         my $git = shift @$try or die 'BUG {try_gits} empty';
         my $oid_b = $want->{oid_b};
+
+        # can't use async_check due to last_check_err :<
         my ($oid_full, $type, $size) = $git->check($oid_b);
+        $git->schedule_cleanup if $self->{psgi_env}->{'pi-httpd.async'};
 
-        # other than {oid_b, try_gits, try_ibxs}
-        my $have_hints = scalar keys %$want > 3;
-        if (defined($type) && (!$have_hints || $type eq 'blob')) {
+        if ($oid_b eq ($oid_full // '') || (defined($type) &&
+                                (!$self->{have_hints} || $type eq 'blob'))) {
                 delete $want->{try_gits};
                 return [ $git, $oid_full, $type, int($size) ]; # done, success
         }
@@ -106,6 +111,11 @@ sub solve_existing ($$) {
         scalar(@$try);
 }
 
+sub _tmp {
+        $_[0]->{tmp} //=
+                File::Temp->newdir("solver.$_[0]->{oid_want}-XXXX", TMPDIR => 1);
+}
+
 sub extract_diff ($$) {
         my ($p, $arg) = @_;
         my ($self, $want, $smsg) = @$arg;
@@ -193,10 +203,8 @@ sub extract_diff ($$) {
 
         my $path = ++$self->{tot};
         $di->{n} = $path;
-        open(my $tmp, '>:utf8', $self->{tmp}->dirname . "/$path") or
-                die "open(tmp): $!";
-        print $tmp $di->{hdr_lines}, $patch or die "print(tmp): $!";
-        close $tmp or die "close(tmp): $!";
+        my $f = _tmp($self)->dirname."/$path";
+        write_file '>:utf8', $f, $di->{hdr_lines}, $patch;
 
         # for debugging/diagnostics:
         $di->{ibx} = $want->{cur_ibx};
@@ -242,14 +250,18 @@ sub find_smsgs ($$$) {
 
 sub update_index_result ($$) {
         my ($bref, $self) = @_;
-        my ($qsp, $msg) = delete @$self{qw(-qsp -msg)};
-        if (my $err = $qsp->{err}) {
-                ERR($self, "git update-index error: $err");
-        }
+        my ($qsp_err, $msg) = delete @$self{qw(-qsp_err -msg)};
+        ERR($self, "git update-index error:$qsp_err") if $qsp_err;
         dbg($self, $msg);
         next_step($self); # onto do_git_apply
 }
 
+sub qsp_qx ($$$) {
+        my ($self, $qsp, $cb) = @_;
+        $qsp->{qsp_err} = \($self->{-qsp_err} = '');
+        $qsp->psgi_qx($self->{psgi_env}, $self->{limiter}, $cb, $self);
+}
+
 sub prepare_index ($) {
         my ($self) = @_;
         my $patches = $self->{patches};
@@ -278,47 +290,33 @@ sub prepare_index ($) {
         my $cmd = [ qw(git update-index -z --index-info) ];
         my $qsp = PublicInbox::Qspawn->new($cmd, $self->{git_env}, $rdr);
         $path_a = git_quote($path_a);
-        $self->{-qsp} = $qsp;
         $self->{-msg} = "index prepared:\n$mode_a $oid_full\t$path_a";
-        $qsp->psgi_qx($self->{psgi_env}, undef, \&update_index_result, $self);
+        qsp_qx $self, $qsp, \&update_index_result;
 }
 
 # pure Perl "git init"
 sub do_git_init ($) {
         my ($self) = @_;
-        my $dir = $self->{tmp}->dirname;
-        my $git_dir = "$dir/git";
+        my $git_dir = _tmp($self)->dirname.'/git';
 
-        foreach ('', qw(objects refs objects/info refs/heads)) {
-                mkdir("$git_dir/$_") or die "mkdir $_: $!";
-        }
-        open my $fh, '>', "$git_dir/config" or die "open git/config: $!";
+        mkdir("$git_dir/$_") for ('', qw(objects refs objects/info refs/heads));
         my $first = $self->{gits}->[0];
         my $fmt = $first->object_format;
-        my $v = defined($$fmt) ? 1 : 0;
-        print $fh <<EOF or die "print git/config $!";
+        my ($v, @ext) = defined($$fmt) ? (1, <<EOM) : (0);
+[extensions]
+        objectformat = $$fmt
+EOM
+        write_file '>', "$git_dir/config", <<EOF, @ext;
 [core]
         repositoryFormatVersion = $v
         filemode = true
         bare = false
         logAllRefUpdates = false
 EOF
-        print $fh <<EOM if defined($$fmt);
-[extensions]
-        objectformat = $$fmt
-EOM
-        close $fh or die "close git/config: $!";
-
-        open $fh, '>', "$git_dir/HEAD" or die "open git/HEAD: $!";
-        print $fh "ref: refs/heads/master\n" or die "print git/HEAD: $!";
-        close $fh or die "close git/HEAD: $!";
-
-        my $f = 'objects/info/alternates';
-        open $fh, '>', "$git_dir/$f" or die "open: $f: $!";
-        foreach my $git (@{$self->{gits}}) {
-                print $fh $git->git_path('objects'),"\n" or die "print $f: $!";
-        }
-        close $fh or die "close: $f: $!";
+        write_file '>', "$git_dir/HEAD", "ref: refs/heads/master\n";
+        write_file '>', "$git_dir/objects/info/alternates", map {
+                        $_->git_path('objects')."\n"
+                } @{$self->{gits}};
         my $tmp_git = $self->{tmp_git} = PublicInbox::Git->new($git_dir);
         $tmp_git->{-tmp} = $self->{tmp};
         $self->{git_env} = {
@@ -384,12 +382,9 @@ sub event_step ($) {
 }
 
 sub next_step ($) {
-        my ($self) = @_;
         # if outside of public-inbox-httpd, caller is expected to be
         # looping event_step, anyways
-        my $async = $self->{psgi_env}->{'pi-httpd.async'} or return;
-        # PublicInbox::HTTPD::Async->new
-        $async->(undef, undef, $self);
+        PublicInbox::DS::requeue($_[0]) if $_[0]->{psgi_env}->{'pi-httpd.async'}
 }
 
 sub mark_found ($$$) {
@@ -405,21 +400,18 @@ sub mark_found ($$$) {
 
 sub parse_ls_files ($$) {
         my ($self, $bref) = @_;
-        my ($qsp, $di) = delete @$self{qw(-qsp -cur_di)};
-        if (my $err = $qsp->{err}) {
-                die "git ls-files error: $err";
-        }
+        my ($qsp_err, $di) = delete @$self{qw(-qsp_err -cur_di)};
+        die "git ls-files -s -z error:$qsp_err" if $qsp_err;
 
-        my ($line, @extra) = split(/\0/, $$bref);
+        my @ls = split(/\0/, $$bref);
+        my ($line, @extra) = grep(/\t\Q$di->{path_b}\E\z/, @ls);
         scalar(@extra) and die "BUG: extra files in index: <",
-                                join('> <', @extra), ">";
-
+                                join('> <', $line, @extra), ">";
+        $line // die "no \Q$di->{path_b}\E in <",join('> <', @ls), '>';
         my ($info, $file) = split(/\t/, $line, 2);
         my ($mode_b, $oid_b_full, $stage) = split(/ /, $info);
-        if ($file ne $di->{path_b}) {
-                die
+        $file eq $di->{path_b} or die
 "BUG: index mismatch: file=$file != path_b=$di->{path_b}";
-        }
 
         my $tmp_git = $self->{tmp_git} or die 'no git working tree';
         my (undef, undef, $size) = $tmp_git->check($oid_b_full);
@@ -454,50 +446,49 @@ sub skip_identical ($$$) {
         }
 }
 
-sub apply_result ($$) {
+sub apply_result ($$) { # qx_cb
         my ($bref, $self) = @_;
-        my ($qsp, $di) = delete @$self{qw(-qsp -cur_di)};
+        my ($qsp_err, $di) = delete @$self{qw(-qsp_err -cur_di)};
         dbg($self, $$bref);
         my $patches = $self->{patches};
-        if (my $err = $qsp->{err}) {
-                my $msg = "git apply error: $err";
+        if ($qsp_err) {
+                my $msg = "git apply error:$qsp_err";
                 my $nxt = $patches->[0];
                 if ($nxt && oids_same_ish($nxt->{oid_b}, $di->{oid_b})) {
                         dbg($self, $msg);
                         dbg($self, 'trying '.di_url($self, $nxt));
                         return do_git_apply($self);
                 } else {
-                        ERR($self, $msg);
+                        $msg .= " (no patches left to try for $di->{oid_b})\n";
+                        dbg($self, $msg);
+                        return done($self, undef);
                 }
         } else {
                 skip_identical($self, $patches, $di->{oid_b});
         }
 
         my @cmd = qw(git ls-files -s -z);
-        $qsp = PublicInbox::Qspawn->new(\@cmd, $self->{git_env});
+        my $qsp = PublicInbox::Qspawn->new(\@cmd, $self->{git_env});
         $self->{-cur_di} = $di;
-        $self->{-qsp} = $qsp;
-        $qsp->psgi_qx($self->{psgi_env}, undef, \&ls_files_result, $self);
+        qsp_qx $self, $qsp, \&ls_files_result;
 }
 
 sub do_git_apply ($) {
         my ($self) = @_;
-        my $dn = $self->{tmp}->dirname;
         my $patches = $self->{patches};
 
         # we need --ignore-whitespace because some patches are CRLF
         my @cmd = (qw(git apply --cached --ignore-whitespace
                         --unidiff-zero --whitespace=warn --verbose));
         my $len = length(join(' ', @cmd));
-        my $total = $self->{tot};
         my $di; # keep track of the last one for "git ls-files"
         my $prv_oid_b;
 
         do {
                 my $i = ++$self->{nr};
                 $di = shift @$patches;
-                dbg($self, "\napplying [$i/$total] " . di_url($self, $di) .
-                        "\n" . $di->{hdr_lines});
+                dbg($self, "\napplying [$i/$self->{nr_p}] " .
+                        di_url($self, $di) . "\n" . $di->{hdr_lines});
                 my $path = $di->{n};
                 $len += length($path) + 1;
                 push @cmd, $path;
@@ -505,11 +496,10 @@ sub do_git_apply ($) {
         } while (@$patches && $len < $ARG_SIZE_MAX &&
                  !oids_same_ish($patches->[0]->{oid_b}, $prv_oid_b));
 
-        my $opt = { 2 => 1, -C => $dn, quiet => 1 };
+        my $opt = { 2 => 1, -C => _tmp($self)->dirname, quiet => 1 };
         my $qsp = PublicInbox::Qspawn->new(\@cmd, $self->{git_env}, $opt);
         $self->{-cur_di} = $di;
-        $self->{-qsp} = $qsp;
-        $qsp->psgi_qx($self->{psgi_env}, undef, \&apply_result, $self);
+        qsp_qx $self, $qsp, \&apply_result;
 }
 
 sub di_url ($$) {
@@ -558,8 +548,9 @@ sub extract_diffs_done {
         my $diffs = delete $self->{tmp_diffs};
         if (scalar @$diffs) {
                 unshift @{$self->{patches}}, @$diffs;
-                dbg($self, "found $want->{oid_b} in " .  join(" ||\n\t",
-                        map { di_url($self, $_) } @$diffs));
+                my @u = uniqstr(map { di_url($self, $_) } @$diffs);
+                dbg($self, "found $want->{oid_b} in " .  join(" ||\n\t", @u));
+                ++$self->{nr_p};
 
                 # good, we can find a path to the oid we $want, now
                 # lets see if we need to apply more patches:
@@ -641,7 +632,7 @@ sub resolve_patch ($$) {
 
         # scan through inboxes to look for emails which results in
         # the oid we want:
-        my $ibx = shift(@{$want->{try_ibxs}}) or die 'BUG: {try_ibxs} empty';
+        my $ibx = shift(@{$want->{try_ibxs}}) or return done($self, undef);
         if (my $msgs = find_smsgs($self, $ibx, $want)) {
                 $want->{try_smsgs} = $msgs;
                 $want->{cur_ibx} = $ibx;
@@ -655,15 +646,19 @@ sub resolve_patch ($$) {
 # so user_cb never references the SolverGit object
 sub new {
         my ($class, $ibx, $user_cb, $uarg) = @_;
+        my $gits = $ibx ? $ibx->{-repo_objs} : undef;
+
+        # FIXME: cindex --join= is super-aggressive and may hit too many
+        $gits = [ @$gits[0..2] ] if $gits && @$gits > 3;
 
-        bless {
-                gits => $ibx->{-repo_objs},
+        bless { # $ibx is undef if coderepo only (see WwwCoderepo)
+                gits => $gits,
                 user_cb => $user_cb,
                 uarg => $uarg,
-                # -cur_di, -qsp, -msg => temporary fields for Qspawn callbacks
+                # -cur_di, -qsp_err, -msg => temp fields for Qspawn callbacks
 
                 # TODO: config option for searching related inboxes
-                inboxes => [ $ibx ],
+                inboxes => $ibx ? [ $ibx ] : [],
         }, $class;
 }
 
@@ -682,17 +677,16 @@ sub solve ($$$$$) {
         $self->{oid_want} = $oid_want;
         $self->{out} = $out;
         $self->{seen_oid} = {};
-        $self->{tot} = 0;
+        $self->{tot} = $self->{nr_p} = 0;
         $self->{psgi_env} = $env;
+        $self->{have_hints} = 1 if scalar keys %$hints;
         $self->{todo} = [ { %$hints, oid_b => $oid_want } ];
         $self->{patches} = []; # [ $di, $di, ... ]
         $self->{found} = {}; # { abbr => [ ::Git, oid, type, size, $di ] }
-        $self->{tmp} = File::Temp->newdir("solver.$oid_want-XXXX", TMPDIR => 1);
 
         dbg($self, "solving $oid_want ...");
-        if (my $async = $env->{'pi-httpd.async'}) {
-                # PublicInbox::HTTPD::Async->new
-                $async->(undef, undef, $self);
+        if ($env->{'pi-httpd.async'}) {
+                PublicInbox::DS::requeue($self);
         } else {
                 event_step($self) while $self->{user_cb};
         }