From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id B999421422 for ; Mon, 21 Jan 2019 20:52:55 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 09/37] view: wire up diff and vcs viewers with solver Date: Mon, 21 Jan 2019 20:52:25 +0000 Message-Id: <20190121205253.10455-10-e@80x24.org> In-Reply-To: <20190121205253.10455-1-e@80x24.org> References: <20190121205253.10455-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: --- MANIFEST | 2 + lib/PublicInbox/Config.pm | 59 ++++++++++++++- lib/PublicInbox/View.pm | 47 +++++++++--- lib/PublicInbox/ViewDiff.pm | 147 ++++++++++++++++++++++++++++++++++++ lib/PublicInbox/ViewVCS.pm | 87 +++++++++++++++++++++ lib/PublicInbox/WWW.pm | 18 ++++- 6 files changed, 345 insertions(+), 15 deletions(-) create mode 100644 lib/PublicInbox/ViewDiff.pm create mode 100644 lib/PublicInbox/ViewVCS.pm diff --git a/MANIFEST b/MANIFEST index 95ad0c6..5e980fe 100644 --- a/MANIFEST +++ b/MANIFEST @@ -109,6 +109,8 @@ lib/PublicInbox/SpawnPP.pm lib/PublicInbox/Unsubscribe.pm lib/PublicInbox/V2Writable.pm lib/PublicInbox/View.pm +lib/PublicInbox/ViewDiff.pm +lib/PublicInbox/ViewVCS.pm lib/PublicInbox/WWW.pm lib/PublicInbox/WWW.pod lib/PublicInbox/WatchMaildir.pm diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index bea2617..355e64b 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -2,12 +2,19 @@ # License: AGPL-3.0+ # # Used throughout the project for reading configuration +# +# Note: I hate camelCase; but git-config(1) uses it, but it's better +# than alllowercasewithoutunderscores, so use lc('configKey') where +# applicable for readability + package PublicInbox::Config; use strict; use warnings; require PublicInbox::Inbox; use PublicInbox::Spawn qw(popen_rd); +sub _array ($) { ref($_[0]) eq 'ARRAY' ? $_[0] : [ $_[0] ] } + # returns key-value pairs of config directives in a hash # if keys may be multi-value, the value is an array ref containing all values sub new { @@ -22,6 +29,7 @@ sub new { $self->{-by_newsgroup} ||= {}; $self->{-no_obfuscate} ||= {}; $self->{-limiters} ||= {}; + $self->{-code_repos} ||= {}; # nick => PublicInbox::Git object if (my $no = delete $self->{'publicinbox.noobfuscate'}) { $no = [ $no ] if ref($no) ne 'ARRAY'; @@ -169,6 +177,41 @@ sub valid_inbox_name ($) { 1; } +# parse a code repo +# Only git is supported at the moment, but SVN and Hg are possibilities +sub _fill_code_repo { + my ($self, $nick) = @_; + my $pfx = "coderepo.$nick"; + + my $dir = $self->{"$pfx.dir"}; # aka "GIT_DIR" + unless (defined $dir) { + warn "$pfx.repodir unset"; + return; + } + + my $git = PublicInbox::Git->new($dir); + foreach my $t (qw(blob commit tree tag)) { + $git->{$t.'_url_format'} = + _array($self->{lc("$pfx.${t}UrlFormat")}); + } + + if (my $cgits = $self->{lc("$pfx.cgitUrl")}) { + $git->{cgit_url} = $cgits = _array($cgits); + + # cgit supports "/blob/?id=%s", but it's only a plain-text + # display and requires an unabbreviated id= + foreach my $t (qw(blob commit tag)) { + $git->{$t.'_url_format'} ||= map { + "$_/$t/?id=%s" + } @$cgits; + } + } + # TODO: support gitweb and other repository viewers? + # TODO: parse cgitrc + + $git; +} + sub _fill { my ($self, $pfx) = @_; my $rv = {}; @@ -192,9 +235,9 @@ sub _fill { } # TODO: more arrays, we should support multi-value for # more things to encourage decentralization - foreach my $k (qw(address altid nntpmirror)) { + foreach my $k (qw(address altid nntpmirror coderepo)) { if (defined(my $v = $self->{"$pfx.$k"})) { - $rv->{$k} = ref($v) eq 'ARRAY' ? $v : [ $v ]; + $rv->{$k} = _array($v); } } @@ -224,6 +267,18 @@ sub _fill { $rv->{-no_obfuscate_re} = $self->{-no_obfuscate_re}; each_inbox($self, sub {}); # noop to populate -no_obfuscate } + + if (my $ibx_code_repos = $rv->{coderepo}) { + my $code_repos = $self->{-code_repos}; + my $repo_objs = $rv->{-repo_objs} = []; + foreach my $nick (@$ibx_code_repos) { + valid_inbox_name($nick) or next; + my $repo = $code_repos->{$nick} ||= + _fill_code_repo($self, $nick); + push @$repo_objs, $repo if $repo; + } + } + $rv } diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index 470e3ab..0187ec3 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -14,6 +14,7 @@ use PublicInbox::MsgIter; use PublicInbox::Address; use PublicInbox::WwwStream; use PublicInbox::Reply; +use PublicInbox::ViewDiff qw(flush_diff); require POSIX; use Time::Local qw(timegm); @@ -28,7 +29,7 @@ sub msg_html { my ($ctx, $mime, $more, $smsg) = @_; my $hdr = $mime->header_obj; my $ibx = $ctx->{-inbox}; - my $obfs_ibx = $ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef; + $ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef; my $tip = _msg_html_prepare($hdr, $ctx, $more, 0); my $end = 2; PublicInbox::WwwStream->response($ctx, 200, sub { @@ -36,7 +37,7 @@ sub msg_html { if ($nr == 1) { # $more cannot be true w/o $smsg being defined: my $upfx = $more ? '../'.mid_escape($smsg->mid).'/' : ''; - $tip . multipart_text_as_html($mime, $upfx, $obfs_ibx) . + $tip . multipart_text_as_html($mime, $upfx, $ibx) . '
' } elsif ($more && @$more) { ++$end; @@ -81,15 +82,15 @@ sub msg_html_more { my $str = eval { my ($id, $prev, $smsg) = @$more; my $mid = $ctx->{mid}; - $smsg = $ctx->{-inbox}->smsg_mime($smsg); + my $ibx = $ctx->{-inbox}; + $smsg = $ibx->smsg_mime($smsg); my $next = $ctx->{srch}->next_by_mid($mid, \$id, \$prev); @$more = $next ? ($id, $prev, $next) : (); if ($smsg) { my $mime = $smsg->{mime}; my $upfx = '../' . mid_escape($smsg->mid) . '/'; _msg_html_prepare($mime->header_obj, $ctx, $more, $nr) . - multipart_text_as_html($mime, $upfx, - $ctx->{-obfs_ibx}) . + multipart_text_as_html($mime, $upfx, $ibx) . '
' } else { ''; @@ -260,7 +261,8 @@ sub index_entry { $rv .= "\n"; # scan through all parts, looking for displayable text - msg_iter($mime, sub { $rv .= add_text_body($mhref, $obfs_ibx, $_[0]) }); + my $ibx = $ctx->{-inbox}; + msg_iter($mime, sub { $rv .= add_text_body($mhref, $ibx, $_[0]) }); # add the footer $rv .= "\n^ ". @@ -488,11 +490,11 @@ sub thread_html { } sub multipart_text_as_html { - my ($mime, $upfx, $obfs_ibx) = @_; + my ($mime, $upfx, $ibx) = @_; my $rv = ""; # scan through all parts, looking for displayable text - msg_iter($mime, sub { $rv .= add_text_body($upfx, $obfs_ibx, $_[0]) }); + msg_iter($mime, sub { $rv .= add_text_body($upfx, $ibx, $_[0]) }); $rv; } @@ -545,7 +547,8 @@ sub attach_link ($$$$;$) { } sub add_text_body { - my ($upfx, $obfs_ibx, $p) = @_; + my ($upfx, $ibx, $p) = @_; + my $obfs_ibx = $ibx->{obfuscate} ? $ibx : undef; # $p - from msg_iter: [ Email::MIME, depth, @idx ] my ($part, $depth) = @$p; # attachment @idx is unused my $ct = $part->content_type || 'text/plain'; @@ -554,6 +557,19 @@ sub add_text_body { return attach_link($upfx, $ct, $p, $fn) unless defined $s; + my ($diff, $spfx); + if ($ibx->{-repo_objs} && $s =~ /^(?:diff|---|\+{3}) /ms) { + $diff = []; + my $n_slash = $upfx =~ tr!/!/!; + if ($n_slash == 0) { + $spfx = '../'; + } elsif ($n_slash == 1) { + $spfx = ''; + } else { # nslash == 2 + $spfx = '../../'; + } + }; + my @lines = split(/^/m, $s); $s = ''; if (defined($fn) || $depth > 0 || $err) { @@ -568,19 +584,26 @@ sub add_text_body { # show the previously buffered quote inline flush_quote(\$s, $l, \@quot) if @quot; - # regular line, OK - $l->linkify_1($cur); - $s .= $l->linkify_2(ascii_html($cur)); + if ($diff) { + push @$diff, $cur; + } else { + # regular line, OK + $l->linkify_1($cur); + $s .= $l->linkify_2(ascii_html($cur)); + } } else { + flush_diff(\$s, $spfx, $l, $diff) if $diff && @$diff; push @quot, $cur; } } if (@quot) { # ugh, top posted flush_quote(\$s, $l, \@quot); + flush_diff(\$s, $spfx, $l, $diff) if $diff && @$diff; obfuscate_addrs($obfs_ibx, $s) if $obfs_ibx; $s; } else { + flush_diff(\$s, $spfx, $l, $diff) if $diff && @$diff; obfuscate_addrs($obfs_ibx, $s) if $obfs_ibx; if ($s =~ /\n\z/s) { # common, last line ends with a newline $s; diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm new file mode 100644 index 0000000..ee450fa --- /dev/null +++ b/lib/PublicInbox/ViewDiff.pm @@ -0,0 +1,147 @@ +# Copyright (C) 2019 all contributors +# License: AGPL-3.0+ +# +# used by PublicInbox::View +package PublicInbox::ViewDiff; +use strict; +use warnings; +use base qw(Exporter); +our @EXPORT_OK = qw(flush_diff); + +use PublicInbox::Hval qw(ascii_html); +use PublicInbox::Git qw(git_unquote); + +sub DSTATE_INIT () { 0 } +sub DSTATE_STAT () { 1 } # TODO +sub DSTATE_HEAD () { 2 } # /^diff --git /, /^index /, /^--- /, /^\+\+\+ / +sub DSTATE_HUNK () { 3 } # /^@@ / +sub DSTATE_CTX () { 4 } # /^ / +sub DSTATE_ADD () { 5 } # /^\+/ +sub DSTATE_DEL () { 6 } # /^\-/ + +my $OID_NULL = '0{7,40}'; +my $OID_BLOB = '[a-f0-9]{7,40}'; +my $PATH_A = '"?a/.+|/dev/null'; +my $PATH_B = '"?b/.+|/dev/null'; + +sub to_html ($$) { + $_[0]->linkify_1($_[1]); + $_[0]->linkify_2(ascii_html($_[1])); +} + +# link to line numbers in blobs +sub diff_hunk ($$$$) { + my ($dctx, $spfx, $ca, $cb) = @_; + my $oid_a = $dctx->{oid_a}; + my $oid_b = $dctx->{oid_b}; + + (defined($oid_a) && defined($oid_b)) or return "@@ $ca $cb @@"; + + my ($n) = ($ca =~ /^-(\d+)/); + $n = defined($n) ? do { ++$n; "#n$n" } : ''; + + my $rv = qq(@@ $ca); + + ($n) = ($cb =~ /^\+(\d+)/); + $n = defined($n) ? do { ++$n; "#n$n" } : ''; + + $rv .= qq( $cb @@); +} + +sub flush_diff ($$$$) { + my ($dst, $spfx, $linkify, $diff) = @_; + my $state = DSTATE_INIT; + my $dctx; # {}, keys: oid_a, oid_b, path_a, path_b + + foreach my $s (@$diff) { + if ($s =~ /^ /) { + if ($state == DSTATE_HUNK || $state == DSTATE_ADD || + $state == DSTATE_DEL || $state == DSTATE_HEAD) { + $$dst .= ""; + $state = DSTATE_CTX; + } + $$dst .= to_html($linkify, $s); + } elsif ($s =~ /^-- $/) { # email signature begins + if ($state != DSTATE_INIT) { + $state = DSTATE_INIT; + $$dst .= ''; + } + $$dst .= $s; + } elsif ($s =~ m!^diff --git ($PATH_A) ($PATH_B)$!x) { + if ($state != DSTATE_HEAD) { + my ($pa, $pb) = ($1, $2); + $$dst .= '' if $state != DSTATE_INIT; + $$dst .= ""; + $state = DSTATE_HEAD; + $pa = (split('/', git_unquote($pa), 2))[1]; + $pb = (split('/', git_unquote($pb), 2))[1]; + $dctx = { path_a => $pa, path_b => $pb }; + } + $$dst .= to_html($linkify, $s); + } elsif ($s =~ s/^(index $OID_NULL\.\.)($OID_BLOB)\b//o) { + $$dst .= qq($1$2); + $$dst .= to_html($linkify, $s) ; + } elsif ($s =~ s/^index ($OID_NULL)(\.\.$OID_BLOB)\b//o) { + $$dst .= 'index '; + $$dst .= qq($1$2); + $$dst .= to_html($linkify, $s); + } elsif ($s =~ /^index ($OID_BLOB)\.\.($OID_BLOB)/o) { + $dctx->{oid_a} = $1; + $dctx->{oid_b} = $2; + $$dst .= to_html($linkify, $s); + } elsif ($s =~ s/^@@ (\S+) (\S+) @@//) { + my ($ca, $cb) = ($1, $2); + if ($state == DSTATE_HEAD || $state == DSTATE_CTX || + $state == DSTATE_ADD || $state == DSTATE_DEL) { + $$dst .= ""; + $state = DSTATE_HUNK; + $$dst .= diff_hunk($dctx, $spfx, $ca, $cb); + } else { + $$dst .= to_html($linkify, "@@ $ca $cb @@"); + } + $$dst .= to_html($linkify, $s); + } elsif ($s =~ m!^--- $PATH_A!) { + if ($state == DSTATE_INIT) { # color only (no oid link) + $state = DSTATE_HEAD; + $$dst .= ""; + } + $$dst .= to_html($linkify, $s); + } elsif ($s =~ m!^\+{3} $PATH_B!) { + if ($state == DSTATE_INIT) { # color only (no oid link) + $state = DSTATE_HEAD; + $$dst .= ""; + } + $$dst .= to_html($linkify, $s); + } elsif ($s =~ /^\+/) { + if ($state != DSTATE_ADD && $state != DSTATE_INIT) { + $$dst .= ""; + $state = DSTATE_ADD; + } + $$dst .= to_html($linkify, $s); + } elsif ($s =~ /^-/) { + if ($state != DSTATE_DEL && $state != DSTATE_INIT) { + $$dst .= ""; + $state = DSTATE_DEL; + } + $$dst .= to_html($linkify, $s); + # ignore the following lines in headers: + } elsif ($s =~ /^(?:dis)similarity index/ || + $s =~ /^(?:old|new) mode/ || + $s =~ /^(?:deleted|new) file mode/ || + $s =~ /^(?:copy|rename) (?:from|to) / || + $s =~ /^(?:dis)?similarity index /) { + $$dst .= to_html($linkify, $s); + } else { + if ($state != DSTATE_INIT) { + $$dst .= ''; + $state = DSTATE_INIT; + } + $$dst .= to_html($linkify, $s); + } + } + @$diff = (); + $$dst .= '' if $state != DSTATE_INIT; + undef; +} + +1; diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm new file mode 100644 index 0000000..49fb1c5 --- /dev/null +++ b/lib/PublicInbox/ViewVCS.pm @@ -0,0 +1,87 @@ +# Copyright (C) 2019 all contributors +# License: AGPL-3.0+ + +# show any VCS object, similar to "git show" +package PublicInbox::ViewVCS; +use strict; +use warnings; +use Encode qw(find_encoding); +use PublicInbox::SolverGit; +use PublicInbox::WwwStream; +use PublicInbox::Linkify; +use PublicInbox::Hval qw(ascii_html); +my %QP_MAP = ( A => 'oid_a', B => 'oid_b', a => 'path_a', b => 'path_b' ); +my $max_size = 1024 * 1024; # TODO: configurable +my $enc_utf8 = find_encoding('UTF-8'); + +sub html_page ($$$) { + my ($ctx, $code, $strref) = @_; + $ctx->{-upfx} = '../'; # from "/$INBOX/$OID/s" + PublicInbox::WwwStream->response($ctx, $code, sub { + my ($nr, undef) = @_; + $nr == 1 ? $$strref : undef; + }); +} + +sub show ($$;$) { + my ($ctx, $oid_b, $fn) = @_; + my $ibx = $ctx->{-inbox}; + my $inboxes = [ $ibx ]; + my $solver = PublicInbox::SolverGit->new($ibx->{-repo_objs}, $inboxes); + my $qp = $ctx->{qp}; + my $hints = {}; + while (my ($from, $to) = each %QP_MAP) { + defined(my $v = $qp->{$from}) or next; + $hints->{$to} = $v; + } + + open my $log, '+>', undef or die "open: $!"; + my $res = $solver->solve($log, $oid_b, $hints); + + seek($log, 0, 0) or die "seek: $!"; + $log = do { local $/; <$log> }; + + my $l = PublicInbox::Linkify->new; + $l->linkify_1($log); + $log = '
debug log:

' .
+		$l->linkify_2(ascii_html($log)) . '
'; + + $res or return html_page($ctx, 404, \$log); + + my ($git, $oid, $type, $size, $di) = @$res; + if ($size > $max_size) { + # TODO: stream the raw file if it's gigantic, at least + $log = '
Too big to show
' . $log; + return html_page($ctx, 500, \$log); + } + + my $blob = $git->cat_file($oid); + if (!$blob) { # WTF? + my $e = "Failed to retrieve generated blob ($oid)"; + $ctx->{env}->{'psgi.errors'}->print("$e ($git->{git_dir})\n"); + $log = "
$e
" . $log; + return html_page($ctx, 500, \$log); + } + + if (index($$blob, "\0") >= 0) { + $log = "
$oid $type $size bytes (binary)
" . $log; + return html_page($ctx, 200, \$log); + } + + $$blob = $enc_utf8->decode($$blob); + my $nl = ($$blob =~ tr/\n/\n/); + my $pad = length($nl); + + # using some of the same CSS class names and ids as cgit + $log = "
$oid $type $size bytes

". + "
" . join('', map {
+			sprintf("% ${pad}u\n", $_)
+		} (1..$nl)) . '
' . + '
 
'. # pad for non-CSS users + "
" .  ascii_html($$blob) .
+		'
' . $log; + + html_page($ctx, 200, \$log); +} + +1; diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index 3562e46..c73370f 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -25,6 +25,7 @@ our $INBOX_RE = qr!\A/([\w\-][\w\.\-]*)!; our $MID_RE = qr!([^/]+)!; our $END_RE = qr!(T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!; our $ATTACH_RE = qr!(\d[\.\d]*)-([[:alnum:]][\w\.-]+[[:alnum:]])!i; +our $OID_RE = qr![a-f0-9]{7,40}!; sub new { my ($class, $pi_config) = @_; @@ -117,7 +118,10 @@ sub call { r301($ctx, $1, $2); } elsif ($path_info =~ m!$INBOX_RE/_/text(?:/(.*))?\z!o) { get_text($ctx, $1, $2); - + } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s\z!o) { + get_vcs_object($ctx, $1, $2); + } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/_([\w\.\-]+)\z!o) { + get_vcs_object($ctx, $1, $2, $3); # convenience redirects order matters } elsif ($path_info =~ m!$INBOX_RE/([^/]{2,})\z!o) { r301($ctx, $1, $2); @@ -259,6 +263,18 @@ sub get_text { PublicInbox::WwwText::get_text($ctx, $key); } +# show git objects (blobs and commits) +# /$INBOX/_/$OBJECT_ID/show +# /$INBOX/_/${OBJECT_ID}_${FILENAME} +# KEY may contain slashes +sub get_vcs_object ($$$;$) { + my ($ctx, $inbox, $oid, $filename) = @_; + my $r404 = invalid_inbox($ctx, $inbox); + return $r404 if $r404; + require PublicInbox::ViewVCS; + PublicInbox::ViewVCS::show($ctx, $oid, $filename); +} + sub ctx_get { my ($ctx, $key) = @_; my $val = $ctx->{$key}; -- EW