From 53ac5b18f6f124fe33bf6736aac0b8c85a0d0d1b Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 15 Jan 2019 08:22:41 +0000 Subject: solver: initial Perl implementation This will lookup git blobs from associated git source code repositories. If the blobs can't be found, an attempt to "solve" them via patch application will be performed. Eventually, this may become the basis of a type-agnostic frontend similar to "git show" --- lib/PublicInbox/Git.pm | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'lib/PublicInbox/Git.pm') diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index 90b9214a..9676086f 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -40,6 +40,7 @@ sub new { my ($class, $git_dir) = @_; my @st; $st[7] = $st[10] = 0; + # may contain {-wt} field (working-tree (File::Temp::Dir)) bless { git_dir => $git_dir, st => \@st }, $class } @@ -201,6 +202,21 @@ sub packed_bytes { sub DESTROY { cleanup(@_) } +# show the blob URL for cgit/gitweb/whatever +sub src_blob_url { + my ($self, $oid) = @_; + # blob_fmt = "https://example.com/foo.git/blob/%s" + if (my $bfu = $self->{blob_fmt_url}) { + return sprintf($bfu, $oid); + } + + # don't show full FS path, basename should be OK: + if ($self->{git_dir} =~ m!/([^/]+)\z!) { + return "/path/to/$1"; + } + '???'; +} + 1; __END__ =pod -- cgit v1.2.3-24-ge0c7 From 8d1766ef6c1ed2a390fe0313f9b8e34475c1d874 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 16 Jan 2019 21:53:07 +0000 Subject: git: support multiple URL endpoints For redundancy and centralization resistance. --- lib/PublicInbox/Git.pm | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) (limited to 'lib/PublicInbox/Git.pm') diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index 9676086f..a270180c 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -202,19 +202,33 @@ sub packed_bytes { sub DESTROY { cleanup(@_) } +sub local_nick ($) { + my ($self) = @_; + my $ret = '???'; + # don't show full FS path, basename should be OK: + if ($self->{git_dir} =~ m!/([^/]+)(?:/\.git)?\z!) { + $ret = "/path/to/$1"; + } + wantarray ? ($ret) : $ret; +} + # show the blob URL for cgit/gitweb/whatever sub src_blob_url { my ($self, $oid) = @_; - # blob_fmt = "https://example.com/foo.git/blob/%s" - if (my $bfu = $self->{blob_fmt_url}) { - return sprintf($bfu, $oid); + # blob_url_format = "https://example.com/foo.git/blob/%s" + if (my $bfu = $self->{blob_url_format}) { + return map { sprintf($_, $oid) } @$bfu if wantarray; + return sprintf($bfu->[0], $oid); } + local_nick($self); +} - # don't show full FS path, basename should be OK: - if ($self->{git_dir} =~ m!/([^/]+)\z!) { - return "/path/to/$1"; +sub pub_urls { + my ($self) = @_; + if (my $urls = $self->{cgit_url}) { + return @$urls; } - '???'; + local_nick($self); } 1; -- cgit v1.2.3-24-ge0c7 From c8a4111320aaed484deecbbc7d1f63f38f3dc57b Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 18 Jan 2019 10:21:40 +0000 Subject: git: add git_quote It'll be helpful for displaying progress in SolverGit output. --- lib/PublicInbox/Git.pm | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'lib/PublicInbox/Git.pm') diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index a270180c..d0ac6b6c 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -13,7 +13,7 @@ use POSIX qw(dup2); require IO::Handle; use PublicInbox::Spawn qw(spawn popen_rd); use base qw(Exporter); -our @EXPORT_OK = qw(git_unquote); +our @EXPORT_OK = qw(git_unquote git_quote); my %GIT_ESC = ( a => "\a", @@ -26,6 +26,8 @@ my %GIT_ESC = ( '"' => '"', '\\' => '\\', ); +my %ESC_GIT = map { $GIT_ESC{$_} => $_ } keys %GIT_ESC; + # unquote pathnames used by git, see quote.c::unquote_c_style.c in git.git sub git_unquote ($) { @@ -36,6 +38,14 @@ sub git_unquote ($) { $_[0]; } +sub git_quote ($) { + if ($_[0] =~ s/([\\"\a\b\f\n\r\t\013]|[^[:print:]])/ + '\\'.($ESC_GIT{$1}||sprintf("%0o",ord($1)))/egs) { + return qq{"$_[0]"}; + } + $_[0]; +} + sub new { my ($class, $git_dir) = @_; my @st; -- cgit v1.2.3-24-ge0c7 From afd27ef5d80792091de97363c563679880083d54 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 18 Jan 2019 05:27:51 +0000 Subject: git: check saves error on disambiguation This will be useful for disambiguating short OIDs in older emails when abbreviations were shorter. Tested against the following script with /path/to/git.git ==> t.perl <== use strict; use PublicInbox::Git; use Data::Dumper; my $dir = shift or die "Usage: $0 GIT_DIR # (of git.git)"; my $git = PublicInbox::Git->new($dir); my @res = $git->check('dead'); print Dumper({res => \@res, err=> $git->last_check_err}); @res = $git->check('5335669531d83d7d6c905bcfca9b5f8e182dc4d4'); print Dumper({res => \@res, err=> $git->last_check_err}); --- lib/PublicInbox/Git.pm | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) (limited to 'lib/PublicInbox/Git.pm') diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index d0ac6b6c..a61cb31b 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -64,9 +64,25 @@ sub alternates_changed { $self->{st} = \@st; } +sub last_check_err { + my ($self) = @_; + my $fh = $self->{err_c} or return; + sysseek($fh, 0, 0) or fail($self, "sysseek failed: $!"); + defined(sysread($fh, my $buf, -s $fh)) or + fail($self, "sysread failed: $!"); + $buf; +} + sub _bidi_pipe { - my ($self, $batch, $in, $out, $pid) = @_; - return if $self->{$pid}; + my ($self, $batch, $in, $out, $pid, $err) = @_; + if ($self->{$pid}) { + if (defined $err) { # "err_c" + my $fh = $self->{$err}; + sysseek($fh, 0, 0) or fail($self, "sysseek failed: $!"); + truncate($fh, 0) or fail($self, "truncate failed: $!"); + } + return; + } my ($in_r, $in_w, $out_r, $out_w); pipe($in_r, $in_w) or fail($self, "pipe failed: $!"); @@ -78,6 +94,11 @@ sub _bidi_pipe { my @cmd = ('git', "--git-dir=$self->{git_dir}", qw(cat-file), $batch); my $redir = { 0 => fileno($out_r), 1 => fileno($in_w) }; + if ($err) { + open(my $fh, '+>', undef) or fail($self, "open.err failed: $!"); + $self->{$err} = $fh; + $redir->{2} = fileno($fh); + } my $p = spawn(\@cmd, undef, $redir); defined $p or fail($self, "spawn failed: $!"); $self->{$pid} = $p; @@ -152,12 +173,23 @@ sub batch_prepare ($) { _bidi_pipe($_[0], qw(--batch in out pid)) } sub check { my ($self, $obj) = @_; - $self->_bidi_pipe(qw(--batch-check in_c out_c pid_c)); + _bidi_pipe($self, qw(--batch-check in_c out_c pid_c err_c)); $self->{out_c}->print($obj, "\n") or fail($self, "write error: $!"); local $/ = "\n"; chomp(my $line = $self->{in_c}->getline); my ($hex, $type, $size) = split(' ', $line); return if $type eq 'missing'; + + # "dead" in git.git shows "dangling 4\ndead\n", not sure why + # https://public-inbox.org/git/20190118033845.s2vlrb3wd3m2jfzu@dcvr/ + # so handle the oddball stuff just in case + if ($hex eq 'dangling' || $hex eq 'notdir' || $hex eq 'loop') { + $size = $type + length("\n"); + my $r = read($self->{in_c}, my $buf, $size); + defined($r) or fail($self, "read failed: $!"); + return; + } + ($hex, $type, $size); } -- cgit v1.2.3-24-ge0c7 From 77d9f14ee83324afc5961c0115480baa5ff5b475 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 18 Jan 2019 06:09:24 +0000 Subject: git: disable abbreviations with cat-file hints Ambiguity is not worth it for internal usage with the solver. --- lib/PublicInbox/Git.pm | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib/PublicInbox/Git.pm') diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index a61cb31b..b655921b 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -92,7 +92,8 @@ sub _bidi_pipe { fcntl($in_w, 1031, 4096) if $batch eq '--batch-check'; } - my @cmd = ('git', "--git-dir=$self->{git_dir}", qw(cat-file), $batch); + my @cmd = (qw(git), "--git-dir=$self->{git_dir}", + qw(-c core.abbrev=40 cat-file), $batch); my $redir = { 0 => fileno($out_r), 1 => fileno($in_w) }; if ($err) { open(my $fh, '+>', undef) or fail($self, "open.err failed: $!"); -- cgit v1.2.3-24-ge0c7 From 4cefd2b05e5262f31505c7226ce761cbd4b1f308 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 19 Jan 2019 06:36:04 +0000 Subject: git: support 'ambiguous' result from --batch-check David Turner's patch to return "ambiguous" seems like a reasonable patch for future versions of git: https://public-inbox.org/git/672a6fb9e480becbfcb5df23ae37193784811b6b.camel@novalis.org/ --- lib/PublicInbox/Git.pm | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'lib/PublicInbox/Git.pm') diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index b655921b..a0b934a3 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -179,11 +179,13 @@ sub check { local $/ = "\n"; chomp(my $line = $self->{in_c}->getline); my ($hex, $type, $size) = split(' ', $line); - return if $type eq 'missing'; - # "dead" in git.git shows "dangling 4\ndead\n", not sure why - # https://public-inbox.org/git/20190118033845.s2vlrb3wd3m2jfzu@dcvr/ - # so handle the oddball stuff just in case + # Future versions of git.git may show 'ambiguous', but for now, + # we must handle 'dangling' below (and maybe some other oddball + # stuff): + # https://public-inbox.org/git/20190118033845.s2vlrb3wd3m2jfzu@dcvr/T/ + return if $type eq 'missing' || $type eq 'ambiguous'; + if ($hex eq 'dangling' || $hex eq 'notdir' || $hex eq 'loop') { $size = $type + length("\n"); my $r = read($self->{in_c}, my $buf, $size); -- cgit v1.2.3-24-ge0c7 From 8a7653c1e4b1305b95db7e65c7ad00d2a50bbce1 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 27 Jan 2019 00:21:51 +0000 Subject: solver: hold patches in temporary directory We can avoid bumping up RLIMIT_NOFILE too much by storing patches in a temporary directory. And we can share this top-level directory with our temporary git repository. Since we no longer rely on a working-tree for git, we are free to rearrange the layout and avoid relying on the ".git" convention and relying on "git -C" for chdir. This may also ease porting public-inbox to older systems where git does not support "-C" for chdir. --- lib/PublicInbox/Git.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/PublicInbox/Git.pm') diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index a0b934a3..3ad08112 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -50,7 +50,7 @@ sub new { my ($class, $git_dir) = @_; my @st; $st[7] = $st[10] = 0; - # may contain {-wt} field (working-tree (File::Temp::Dir)) + # may contain {-tmp} field for File::Temp::Dir bless { git_dir => $git_dir, st => \@st }, $class } -- cgit v1.2.3-24-ge0c7