# Copyright (C) 2016 all contributors
# License: AGPL-3.0+
# common git diff-related code
package PublicInbox::RepoGitDiffCommon;
use strict;
use warnings;
use PublicInbox::RepoGit qw/git_unquote git_commit_title/;
use PublicInbox::Hval qw/utf8_html to_attr/;
use base qw/Exporter/;
our @EXPORT = qw/git_diff_sed_init git_diff_sed_close git_diff_sed_run
DSTATE_INIT DSTATE_STAT DSTATE_LINES/;
# index abcdef89..01234567
sub git_diff_ab_index ($$$) {
my ($xa, $xb, $end) = @_;
# not wasting bandwidth on links here, yet
# links in hunk headers are far more useful with line offsets
$end = utf8_html($end);
"index $xa..$xb$end";
}
# diff --git a/foo.c b/bar.c
sub git_diff_ab_hdr ($$$) {
my ($req, $fa, $fb) = @_;
my $html_a = utf8_html($fa);
my $html_b = utf8_html($fb);
$fa = git_unquote($fa);
$fb = git_unquote($fb);
$fa =~ s!\Aa/!!;
$fb =~ s!\Ab/!!;
my $anchor = to_attr($fb);
delete $req->{anchors}->{$anchor};
$fa = $req->{fa} = PublicInbox::Hval->utf8($fa);
$fb = $req->{fb} = PublicInbox::Hval->utf8($fb);
$req->{path_a} = $fa->as_path;
$req->{path_b} = $fb->as_path;
# not wasting bandwidth on links here
# links in hunk headers are far more useful with line offsets
qq(diff --git $html_a $html_b);
}
# diff (--cc|--combined)
sub git_diff_cc_hdr {
my ($req, $combined, $path) = @_;
my $html_path = utf8_html($path);
$path = git_unquote($path);
my $anchor = to_attr($path);
delete $req->{anchors}->{$anchor};
my $cc = $req->{cc} = PublicInbox::Hval->utf8($path);
$req->{path_cc} = $cc->as_path;
qq(diff --$combined $html_path);
}
# @@ -1,2 +3,4 @@ (regular diff)
sub git_diff_ab_hunk ($$$$) {
my ($req, $ca, $cb, $ctx) = @_;
my ($na) = ($ca =~ /\A-(\d+)/);
my ($nb) = ($cb =~ /\A\+(\d+)/);
# we add "rel=nofollow" here to reduce load on search engines, here
my $rel = $req->{relcmd};
my $rv = '@@ ';
if (defined($na) && $na == 0) { # new file
$rv .= $ca;
} else {
$na = defined $na ? "#n$na" : '';
my $p = $req->{p}->[0];
$rv .= qq({path_a}$na">);
$rv .= "$ca";
}
$rv .= ' ';
if (defined($nb) && $nb == 0) { # deleted file
$rv .= $cb;
} else {
$nb = defined $nb ? "#n$nb" : '';
$rv .= qq({-tip}/$req->{path_b}$nb">);
$rv .= "$cb";
}
$rv . ' @@' . utf8_html($ctx);
}
# index abcdef09,01234567..76543210
sub git_diff_cc_index {
my ($req, $before, $last, $end) = @_;
$end = utf8_html($end);
my @before = split(',', $before);
$req->{pobj_cc} = \@before;
# not wasting bandwidth on links here, yet
# links in hunk headers are far more useful with line offsets
"index $before..$last$end";
}
# @@@ -1,2 -3,4 +5,6 @@@ (combined diff)
sub git_diff_cc_hunk {
my ($req, $at, $offs, $ctx) = @_;
my @offs = split(' ', $offs);
my $last = pop @offs;
my @p = @{$req->{p}};
my @pobj = @{$req->{pobj_cc}};
my $path = $req->{path_cc};
my $rel = $req->{relcmd};
my $rv = $at;
# special 'cc' action as we don't have reliable paths from parents
my $ppath = "${rel}cc/$path";
foreach my $off (@offs) {
my $p = shift @p;
my $obj = shift @pobj; # blob SHA-1
my ($n) = ($off =~ /\A-(\d+)/); # line number
if ($n == 0) { # new file (does this happen with --cc?)
$rv .= " $off";
} else {
$rv .= " ";
$rv .= "$off";
}
}
# we can use the normal 'tree' endpoint for the result
my ($n) = ($last =~ /\A\+(\d+)/); # line number
if ($n == 0) { # deleted file (does this happen with --cc?)
$rv .= " $last";
} else {
my $H = $req->{H};
$rv .= qq( $last);
}
$rv .= " $at" . utf8_html($ctx);
}
sub git_diffstat_rename ($$$) {
my ($req, $from, $to) = @_;
my $anchor = to_attr(git_unquote($to));
$req->{anchors}->{$anchor} = $to;
my @from = split('/', $from);
my @to = split('/', $to);
my $orig_to = $to;
my ($base, @base);
while (@to && @from && $to[0] eq $from[0]) {
push @base, shift(@to);
shift @from;
}
$base = utf8_html(join('/', @base)) if @base;
$from = utf8_html(join('/', @from));
$to = PublicInbox::Hval->utf8(join('/', @to), $orig_to);
my $tp = $to->as_path;
my $th = $to->as_html;
$to = qq($th);
@base ? "$base/{$from => $to}" : "$from => $to";
}
sub DSTATE_INIT () { 0 }
sub DSTATE_STAT () { 1 }
sub DSTATE_LINES () { 2 }
sub git_diff_sed_init ($) {
my ($req) = @_;
$req->{dbuf} = '';
$req->{-tip} = $req->{-repo}->tip;
$req->{ndiff} = $req->{nchg} = $req->{nadd} = $req->{ndel} = 0;
$req->{dstate} = DSTATE_INIT;
}
sub git_diff_sed_stat ($$) {
my ($dst, $req) = @_;
my @stat = split(/\0/, $req->{dbuf}, -1);
my $eos;
my $nchg = \($req->{nchg});
my $nadd = \($req->{nadd});
my $ndel = \($req->{ndel});
if (!$req->{dstat_started}) {
$req->{dstat_started} = 1;
# merges start with an extra '\0' before the diffstat
# non-merge commits start with an extra '\n', instead
if ($req->{mhelp}) {
if ($stat[0] eq '') {
shift @stat;
} else {
warn
'initial merge diffstat line was not empty';
}
} else {
# for commits, only (not diff-tree)
$stat[0] =~ s/\A\n//s;
}
}
while (defined(my $l = shift @stat)) {
if ($l eq '') {
$eos = 1 if $stat[0] && $stat[0] =~ /\Ad/; # "diff --"
last;
} elsif ($l =~ /\Adiff /) {
unshift @stat, $l;
$eos = 1;
last;
}
$l =~ /\A(\S+)\t+(\S+)\t+(.*)/ or next;
my ($add, $del, $fn) = ($1, $2, $3);
if ($fn ne '') { # normal modification
# TODO: discard diffs if they are too big
# gigantic changes with many files may still OOM us
my $anchor = to_attr(git_unquote($fn));
$req->{anchors}->{$anchor} = $fn;
$l = utf8_html($fn);
$l = qq($l);
} else { # rename
# incomplete...
if (scalar(@stat) < 2) {
unshift @stat, $l;
last;
}
my $from = shift @stat;
my $to = shift @stat;
$l = git_diffstat_rename($req, $from, $to);
}
# text changes show numerically, Binary does not
if ($add =~ /\A\d+\z/) {
$$nadd += $add;
$$ndel += $del;
$add = "+$add";
$del = "-$del";
}
++$$nchg;
my $num = sprintf('% 6s/%-6s', $del, $add);
$$dst .= " $num\t$l\n";
}
$req->{dbuf} = join("\0", @stat);
return unless $eos;
$req->{dstate} = DSTATE_LINES;
$$dst .= "\n $$nchg ";
$$dst .= $$nchg == 1 ? 'file changed, ' : 'files changed, ';
$$dst .= $$nadd;
$$dst .= $$nadd == 1 ? ' insertion(+), ' : ' insertions(+), ';
$$dst .= $$ndel;
$$dst .= $$ndel == 1 ? " deletion(-)\n\n" : " deletions(-)\n\n";
}
sub git_diff_sed_lines ($$) {
my ($dst, $req) = @_;
# TODO: discard diffs if they are too big
my @dlines = split(/\n/, $req->{dbuf}, -1);
$req->{dbuf} = '';
if (my $help = delete $req->{mhelp}) {
$$dst .= $help; # CC_MERGE
}
# don't touch the last line, it may not be terminated
$req->{dbuf} .= pop @dlines;
my $ndiff = \($req->{ndiff});
my $cmt = '[a-f0-9]+';
while (defined(my $l = shift @dlines)) {
if ($l =~ m{\Adiff --git ("?a/.+) ("?b/.+)\z}) { # regular
$$dst .= git_diff_ab_hdr($req, $1, $2) . "\n";
} elsif ($l =~ m{\Adiff --(cc|combined) (.+)\z}) {
$$dst .= git_diff_cc_hdr($req, $1, $2) . "\n";
} elsif ($l =~ /\Aindex ($cmt)\.\.($cmt)(.*)\z/o) { # regular
$$dst .= git_diff_ab_index($1, $2, $3) . "\n";
} elsif ($l =~ /\A@@ (\S+) (\S+) @@(.*)\z/) { # regular
$$dst .= git_diff_ab_hunk($req, $1, $2, $3) . "\n";
} elsif ($l =~ /\Aindex ($cmt,[^\.]+)\.\.($cmt)(.*)$/o) { #--cc
$$dst .= git_diff_cc_index($req, $1, $2, $3) . "\n";
} elsif ($l =~ /\A(@@@+) (\S+.*\S+) @@@+(.*)\z/) { # --cc
$$dst .= git_diff_cc_hunk($req, $1, $2, $3) . "\n";
} else {
$$dst .= utf8_html($l) . "\n";
}
++$$ndiff;
}
}
sub git_diff_sed_run ($$) {
my ($dst, $req) = @_;
$req->{dstate} == DSTATE_STAT and git_diff_sed_stat($dst, $req);
$req->{dstate} == DSTATE_LINES and git_diff_sed_lines($dst, $req);
undef;
}
sub git_diff_sed_close ($$) {
my ($dst, $req) = @_;
$$dst .= utf8_html(delete $req->{dbuf});
undef;
}
1;