From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 68F0F1F934 for ; Wed, 5 May 2021 10:46:39 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 1/2] lei rediff: regenerate diffs from stdin Date: Wed, 5 May 2021 10:46:37 +0000 Message-Id: <20210505104638.68435-2-e@80x24.org> In-Reply-To: <20210505104638.68435-1-e@80x24.org> References: <20210505104638.68435-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Sometimes a mailed patch is generated with non-ideal output, (lacking context, noisy whitespace changes, etc.), or a user wants to use the same external diff viewer they've configured git to use. Since we have SolverGit to regenerate arbitrary blobs from patches; this new command allows us to regenerate a diff with different options using the blobs SolverGit gives us. The amount of git-diff(1) options is mind numbing, so it's likely I missed some favorites or botched the getopt spec translation. This also fixes Inbox::base_url to check psgi.url_scheme before attempting to generate URLs and avoid uninitialized variable warnings. Oddly, the "lei blob" tests did not trigger these uninitialized warnings. Note: this will automatically import+index the message(s) it's regenerating, because solver relies on being able to lookup pre/postimage OIDs and read blobs. --- MANIFEST | 1 + lib/PublicInbox/Inbox.pm | 2 +- lib/PublicInbox/LEI.pm | 22 ++++ lib/PublicInbox/LeiInput.pm | 6 + lib/PublicInbox/LeiRediff.pm | 245 +++++++++++++++++++++++++++++++++++ t/solver_git.t | 8 +- 6 files changed, 282 insertions(+), 2 deletions(-) create mode 100644 lib/PublicInbox/LeiRediff.pm diff --git a/MANIFEST b/MANIFEST index b40147b0..7be07aa5 100644 --- a/MANIFEST +++ b/MANIFEST @@ -211,6 +211,7 @@ lib/PublicInbox/LeiMirror.pm lib/PublicInbox/LeiOverview.pm lib/PublicInbox/LeiP2q.pm lib/PublicInbox/LeiQuery.pm +lib/PublicInbox/LeiRediff.pm lib/PublicInbox/LeiRemote.pm lib/PublicInbox/LeiSavedSearch.pm lib/PublicInbox/LeiSearch.pm diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm index da7ea75f..b94ffdb0 100644 --- a/lib/PublicInbox/Inbox.pm +++ b/lib/PublicInbox/Inbox.pm @@ -241,7 +241,7 @@ sub cloneurl { sub base_url { my ($self, $env) = @_; # env - PSGI env - if ($env) { + if ($env && $env->{'psgi.url_scheme'}) { my $url = PublicInbox::Git::host_prefix_url($env, ''); # for mount in Plack::Builder $url .= '/' if $url !~ m!/\z!; diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index c5fdfeb8..9dbbeba9 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -135,6 +135,23 @@ my @lxs_opt = (qw(remote! local! external! include|I=s@ exclude=s@ only=s@ import-remote! no-torsocks torsocks=s), PublicInbox::LeiQuery::curl_opt()); +# we don't support -C as an alias for --find-copies since it's already +# used for chdir +our @diff_opt = qw(unified|U=i output-indicator-new=s output-indicator-old=s + output-indicator-context=s indent-heuristic! + minimal patience histogram anchored=s@ diff-algorithm=s + color-moved=s color-moved-ws=s no-color-moved no-color-moved-ws + word-diff:s word-diff-regex=s color-words:s no-renames + rename-empty! check ws-error-highlight=s full-index binary + abbrev:i break-rewrites|B:s find-renames|M:s find-copies:s + find-copies-harder irreversible-delete|D l=i diff-filter=s + S=s G=s find-object=s pickaxe-all pickaxe-regex O=s R + relative:s text|a ignore-cr-at-eol ignore-space-at-eol + ignore-space-change|b ignore-all-space|w ignore-blank-lines + inter-hunk-context=i function-context|W exit-code ext-diff + no-ext-diff textconv! src-prefix=s dst-prefix=s no-prefix + line-prefix=s); + # we generate shell completion + help using %CMD and %OPTDESC, # see lei__complete() and PublicInbox::LeiHelp # command => [ positional_args, 1-line description, Getopt::Long option spec ] @@ -162,6 +179,11 @@ our %CMD = ( # sorted in order of importance/use: qw(git-dir=s@ cwd! verbose|v+ mail! oid-a|A=s path-a|a=s path-b|b=s), @lxs_opt, @c_opt ], +'rediff' => [ '[--stdin|LOCATION...]', + 'regenerate a diff with different options', + qw(git-dir=s@ cwd! verbose|v+ color:s no-color), + @diff_opt, @lxs_opt, @c_opt ], + 'add-external' => [ 'LOCATION', 'add/set priority of a publicinbox|extindex for extra matches', qw(boost=i mirror=s no-torsocks torsocks=s inbox-version=i diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm index 46eea111..87083564 100644 --- a/lib/PublicInbox/LeiInput.pm +++ b/lib/PublicInbox/LeiInput.pm @@ -69,6 +69,12 @@ error reading $name: $! # but no Content-Length or "From " escaping. # "git format-patch" also generates such files by default. $buf =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; + + # a user may feed just a body: git diff | lei rediff -U9 + if ($self->{-force_eml}) { + my $eml = PublicInbox::Eml->new($buf); + substr($buf, 0, 0) = "\n\n" if !$eml->{bdy}; + } $self->input_eml_cb(PublicInbox::Eml->new(\$buf), @args); } else { # prepare_inputs already validated $ifmt diff --git a/lib/PublicInbox/LeiRediff.pm b/lib/PublicInbox/LeiRediff.pm new file mode 100644 index 00000000..6c734bef --- /dev/null +++ b/lib/PublicInbox/LeiRediff.pm @@ -0,0 +1,245 @@ +# Copyright (C) 2021 all contributors +# License: AGPL-3.0+ + +# The "lei rediff" sub-command, regenerates diffs with new options +package PublicInbox::LeiRediff; +use strict; +use v5.10.1; +use parent qw(PublicInbox::IPC PublicInbox::LeiInput); +use File::Temp 0.19 (); # 0.19 for ->newdir +use PublicInbox::Spawn qw(spawn which); +use PublicInbox::MsgIter qw(msg_part_text); +use PublicInbox::ViewDiff; +use PublicInbox::LeiBlob; +use PublicInbox::Git qw(git_quote git_unquote); +use PublicInbox::Import; +use PublicInbox::LEI; +use PublicInbox::SolverGit; + +sub rediff_user_cb { # called by solver when done + my ($res, $self) = @_; + my $lei = $self->{lei}; + my $log_buf = delete $lei->{log_buf}; + $$log_buf =~ s/^/# /sgm; + ref($res) eq 'ARRAY' or return $lei->child_error(1 << 8, $$log_buf); + $lei->qerr($$log_buf); + my ($git, $oid, $type, $size, $di) = @$res; + my $oid_want = delete $self->{cur_oid_want}; + + # don't try to support all the git-show(1) options for non-blob, + # this is just a convenience: + $type ne 'blob' and return $lei->err(<{git_dir} (wanted: $oid_want) +EOF + $self->{blob}->{$oid_want} = $oid; + push @{$self->{gits}}, $git if $git->{-tmp}; +} + +# returns a full blob for oid_want +sub solve_1 ($$$) { + my ($self, $oid_want, $hints) = @_; + return if $oid_want =~ /\A0+\z/; + $self->{cur_oid_want} = $oid_want; + my $solver = bless { + gits => $self->{gits}, + user_cb => \&rediff_user_cb, + uarg => $self, + inboxes => [ $self->{lxs}->locals, @{$self->{rmt}} ], + }, 'PublicInbox::SolverGit'; + open my $log, '+>', \(my $log_buf = '') or die "PerlIO::scalar: $!"; + $self->{lei}->{log_buf} = \$log_buf; + local $PublicInbox::DS::in_loop = 0; # waitpid synchronously + $solver->solve($self->{lei}->{env}, $log, $oid_want, $hints); + $self->{blob}->{$oid_want}; # full OID +} + +sub diff_ctxq ($$) { + my ($self, $ctxq) = @_; + return unless $ctxq; + my $blob = $self->{blob}; + my $ta = <<'EOM'; +reset refs/heads/A +commit refs/heads/A +author 0 +0000 +committer 0 +0000 +data 0 +EOM + my $tb = $ta; + $tb =~ tr!A!B!; + my $lei = $self->{lei}; + my $wait = delete($self->{-do_done}) ? $lei->{sto}->ipc_do('done') : 0; + while (my ($oid_a, $oid_b, $pa, $pb) = splice(@$ctxq, 0, 4)) { + my $xa = $blob->{$oid_a} //= solve_1($self, $oid_a, + { path_b => $pa }); + my $xb = $blob->{$oid_b} //= solve_1($self, $oid_b, { + oid_a => $oid_a, + path_a => $pa, + path_b => $pb + }); + $ta .= "M 100644 $xa ".git_quote($pa)."\n" if $xa; + $tb .= "M 100644 $xb ".git_quote($pb)."\n" if $xb; + } + my $rw = $self->{gits}->[-1]; # has all known alternates + if (!$rw->{-tmp}) { + my $d = "$self->{rdtmp}/for_tree.git"; + -d $d or PublicInbox::Import::init_bare($d); + my $f = "$d/objects/info/alternates"; # always overwrite + open my $fh, '>', $f or die "open $f: $!"; + for my $git (@{$self->{gits}}) { + print $fh $git->git_path('objects'),"\n"; + } + close $fh or die "close $f: $!"; + $rw = PublicInbox::Git->new($d); + } + pipe(my ($r, $w)) or die "pipe: $!"; + my $pid = spawn(['git', "--git-dir=$rw->{git_dir}", + qw(fast-import --quiet --done --date-format=raw)], + $lei->{env}, { 2 => $lei->{2}, 0 => $r }); + close $r or die "close r fast-import: $!"; + print $w $ta, "\n", $tb, "\ndone\n" or die "print fast-import: $!"; + close $w or die "close w fast-import: $!"; + waitpid($pid, 0); + die "fast-import failed: \$?=$?" if $?; + + my @cmd = qw(diff); + my $opt = $lei->{opt}; + push @cmd, '--'.($opt->{color} && !$opt->{'no-color'} ? '' : 'no-'). + 'color'; + for my $o (@PublicInbox::LEI::diff_opt) { + $o =~ s/\|([a-z0-9])\b//i; # remove single char short option + my $c = $1; + if ($o =~ s/=[is]@\z//) { + my $v = $opt->{$o} or next; + push @cmd, map { $c ? "-$c$_" : "--$o=$_" } @$v; + } elsif ($o =~ s/=[is]\z//) { + my $v = $opt->{$o} // next; + push @cmd, $c ? "-$c$v" : "--$o=$v"; + } elsif ($o =~ s/:[is]\z//) { + my $v = $opt->{$o} // next; + push @cmd, $c ? "-$c$v" : + ($v eq '' ? "--$o" : "--$o=$v"); + } elsif ($o =~ s/!\z//) { + my $v = $opt->{$o} // next; + push @cmd, $v ? "--$o" : "--no-$o"; + } elsif ($opt->{$o}) { + push @cmd, $c ? "-$c" : "--$o"; + } + } + $lei->qerr("# git @cmd"); + push @cmd, qw(A B); + unshift @cmd, 'git', "--git-dir=$rw->{git_dir}"; + $pid = spawn(\@cmd, $lei->{env}, { 2 => $lei->{2}, 1 => $lei->{1} }); + waitpid($pid, 0); + $lei->child_error($?) if $?; # for git diff --exit-code +} + +sub extract_oids { # Eml each_part callback + my ($ary, $self) = @_; + my ($p, undef, $idx) = @$ary; + $self->{lei}->out($p->header_obj->as_string, "\n"); + my ($s, undef) = msg_part_text($p, $p->content_type || 'text/plain'); + defined $s or return; + my @top = split($PublicInbox::ViewDiff::EXTRACT_DIFFS, $s); + undef $s; + my $blobs = $self->{blobs}; # blobs to resolve + my $ctxq; + while (defined(my $x = shift @top)) { + if (scalar(@top) >= 4 && + $top[1] =~ $PublicInbox::ViewDiff::IS_OID && + $top[0] =~ $PublicInbox::ViewDiff::IS_OID) { + my ($oid_a, $oid_b, $pa, $pb) = splice(@top, 0, 4); + $pa eq '/dev/null' or + $pa = (split(m'/', git_unquote($pa), 2))[1]; + $pb eq '/dev/null' or + $pb = (split(m'/', git_unquote($pb), 2))[1]; + $blobs->{$oid_a} //= undef; + $blobs->{$oid_b} //= undef; + push @$ctxq, $oid_a, $oid_b, $pa, $pb; + } elsif ($ctxq) { + my @out; + for (split(/^/sm, $x)) { + if (/\A-- \r?\n/s) { # email sig starts + push @out, $_; + $ctxq = diff_ctxq($self, $ctxq); + } elsif ($ctxq && (/\A[\+\- ]/ || /\A@@ / || + # allow totally blank lines w/o leading + # SP, git-apply does: + /\A\r?\n/s)) { + next; + } else { + push @out, $_; + } + } + $self->{lei}->out(@out) if @out; + } else { + $ctxq = diff_ctxq($self, $ctxq); + $self->{lei}->out($x); + } + } + $ctxq = diff_ctxq($self, $ctxq); +} + +sub input_eml_cb { # callback for all emails + my ($self, $eml) = @_; + $self->{lei}->{sto}->ipc_do('add_eml', $eml); + $self->{-do_done} = 1; + $eml->each_part(\&extract_oids, $self, 1); +} + +sub lei_rediff { + my ($lei, @inputs) = @_; + $lei->_lei_store(1)->write_prepare($lei); + $lei->{opt}->{stdin} = 1 if !@inputs; + $lei->{opt}->{'in-format'} //= 'eml'; + # maybe it's a non-email (code) blob from a coderepo + my $git_dirs = $lei->{opt}->{'git-dir'} //= []; + if ($lei->{opt}->{cwd} // 1) { + my $cgd = PublicInbox::LeiBlob::get_git_dir($lei, '.'); + unshift(@$git_dirs, $cgd) if defined $cgd; + } + return $lei->fail('no --git-dir to try') unless @$git_dirs; + my $lxs = $lei->lxs_prepare; + if ($lxs->remotes) { + require PublicInbox::LeiRemote; + $lei->{curl} //= which('curl') or return + $lei->fail('curl needed for', $lxs->remotes); + } + $lei->ale->refresh_externals($lxs); + my $self = bless { + -force_eml => 1, # for LeiInput->input_fh + lxs => $lxs, + }, __PACKAGE__; + $self->prepare_inputs($lei, \@inputs) or return; + my $isatty = -t $lei->{1}; + $lei->{opt}->{color} //= $isatty; + $lei->start_pager if $isatty; + my ($op_c, $ops) = $lei->workers_start($self, 1); + $lei->{wq1} = $self; + net_merge_all_done($self) unless $lei->{auth}; + $op_c->op_wait_event($ops); +} + +sub ipc_atfork_child { + my ($self) = @_; + PublicInbox::LeiInput::input_only_atfork_child(@_); + my $lei = $self->{lei}; + $lei->{1}->autoflush(1); + binmode $lei->{1}, ':utf8'; + $self->{blobs} = {}; # oidhex => filename + $self->{rdtmp} = File::Temp->newdir('lei-rediff-XXXX', TMPDIR => 1); + $self->{rmt} = [ map { + PublicInbox::LeiRemote->new($lei, $_) + } $self->{lxs}->remotes ]; + $self->{gits} = [ map { + PublicInbox::Git->new($lei->rel2abs($_)) + } @{$self->{lei}->{opt}->{'git-dir'}} ]; + $lei->{env}->{'psgi.errors'} = $lei->{2}; # ugh... + $lei->{env}->{TMPDIR} = $self->{rdtmp}->dirname; + undef; +} + +no warnings 'once'; +*net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done; +*net_merge_all = \&PublicInbox::LeiAuth::net_merge_all; +1; diff --git a/t/solver_git.t b/t/solver_git.t index 75387b2a..e566efb3 100644 --- a/t/solver_git.t +++ b/t/solver_git.t @@ -32,7 +32,7 @@ my $v1_0_0_tag_short = substr($v1_0_0_tag, 0, 16); my $expect = '69df7d565d49fbaaeb0a067910f03dc22cd52bd0'; my $non_existent = 'ee5e32211bf62ab6531bdf39b84b6920d0b6775a'; -test_lei({tmpdir => $tmpdir}, sub { +test_lei({tmpdir => "$tmpdir/blob"}, sub { lei_ok('blob', '--mail', $patch2_oid, '-I', $ibx->{inboxdir}, \'--mail works for existing oid'); is($lei_out, $patch2->as_string, 'blob matches'); @@ -64,6 +64,12 @@ test_lei({tmpdir => $tmpdir}, sub { lei_ok('blob', $v1_0_0_tag_short, '-I', $ibx->{inboxdir}); }); +test_lei({tmpdir => "$tmpdir/rediff"}, sub { + lei_ok(qw(rediff -q -U9 t/solve/0001-simple-mod.patch)); + like($lei_out, qr!^\Q+++\E b/TODO\n@@ -103,9 \+103,11 @@!sm, + 'got more context with -U9'); +}); + my $git = PublicInbox::Git->new($git_dir); $ibx->{-repo_objs} = [ $git ]; my $res;