From 3e0e596105198cfad0eaf3e15f69a21c6bc9ffe1 Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Sun, 9 Jun 2019 04:31:02 +0000 Subject: wwwlisting: allow hiding entries from manifest Since we already have a mechanism for hiding repositories from the WWW listing, we might as well support another one for hiding repositories from the upcoming manifest.js.gz generation. --- lib/PublicInbox/WwwListing.pm | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm index e1473b3d..6d6d3015 100644 --- a/lib/PublicInbox/WwwListing.pm +++ b/lib/PublicInbox/WwwListing.pm @@ -10,25 +10,27 @@ use PublicInbox::Hval qw(ascii_html); use PublicInbox::Linkify; use PublicInbox::View; -sub list_all ($$) { - my ($self, undef) = @_; +sub list_all ($$$) { + my ($self, $env, $hide_key) = @_; my @list; $self->{pi_config}->each_inbox(sub { my ($ibx) = @_; - push @list, $ibx unless $ibx->{-hide}->{www}; + push @list, $ibx unless $ibx->{-hide}->{$hide_key}; }); \@list; } -sub list_match_domain ($$) { - my ($self, $env) = @_; +sub list_match_domain ($$$) { + my ($self, $env, $hide_key) = @_; my @list; my $host = $env->{HTTP_HOST} // $env->{SERVER_NAME}; $host =~ s/:[0-9]+\z//; my $re = qr!\A(?:https?:)?//\Q$host\E(?::[0-9]+)?/!i; $self->{pi_config}->each_inbox(sub { my ($ibx) = @_; - push @list, $ibx if !$ibx->{-hide}->{www} && $ibx->{url} =~ $re; + if (!$ibx->{-hide}->{$hide_key} && $ibx->{url} =~ $re) { + push @list, $ibx; + } }); \@list; } @@ -78,7 +80,11 @@ sub ibx_entry { sub call { my ($self, $env) = @_; my $h = [ 'Content-Type', 'text/html; charset=UTF-8' ]; - my $list = $self->{list_cb}->($self, $env); + my $hide_key = 'www'; + if ($env->{PATH_INFO} =~ m!/manifest\.js(?:\.gz)\z/!) { + $hide_key = 'manifest'; + } + my $list = $self->{list_cb}->($self, $env, $hide_key); my $code = 404; my $title = 'public-inbox'; my $out = ''; -- cgit v1.2.3-24-ge0c7 From 0b3e19584c90d958a723ac2d3dec3f84f5513688 Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Sun, 9 Jun 2019 04:31:03 +0000 Subject: wwwlisting: generate grokmirror-compatible manifest.js.gz Support on-demand generation of "/manifest.js.gz" for inboxes. By default, this matches inboxes with URLs matching the given request hostname by default. This makes it easier to create full mirrors of several inboxes without needing to configure static file serving. cf. https://git.kernel.org/pub/scm/utils/grokmirror/grokmirror.git --- lib/PublicInbox/WWW.pm | 2 +- lib/PublicInbox/WwwListing.pm | 164 ++++++++++++++++++++++++++++++++++++------ 2 files changed, 143 insertions(+), 23 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index 7ea98204..614adad6 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -88,7 +88,7 @@ sub call { } # top-level indices and feeds - if ($path_info eq '/') { + if ($path_info eq '/' || $path_info eq '/manifest.js.gz') { www_listing($self)->call($env); } elsif ($path_info =~ m!$INBOX_RE\z!o) { invalid_inbox($ctx, $1) || r301($ctx, $1); diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm index 6d6d3015..690976ac 100644 --- a/lib/PublicInbox/WwwListing.pm +++ b/lib/PublicInbox/WwwListing.pm @@ -9,6 +9,11 @@ use warnings; use PublicInbox::Hval qw(ascii_html); use PublicInbox::Linkify; use PublicInbox::View; +use bytes (); +use HTTP::Date qw(time2str); +require Digest::SHA; +require File::Spec; +{ no warnings 'once'; *try_cat = *PublicInbox::Inbox::try_cat }; sub list_all ($$$) { my ($self, $env, $hide_key) = @_; @@ -44,21 +49,27 @@ my %VALID = ( 404 => *list_404, ); +sub set_cb ($$$) { + my ($pi_config, $k, $default) = @_; + my $v = $pi_config->{lc $k} // $default; + $VALID{$v} || do { + warn <<""; +`$v' is not a valid value for `$k' +$k be one of `all', `match=domain', or `404' + + $VALID{$default}; + }; +} + sub new { my ($class, $www) = @_; - my $k = 'publicinbox.wwwListing'; my $pi_config = $www->{pi_config}; - my $v = $pi_config->{lc($k)} // 404; bless { pi_config => $pi_config, style => $www->style("\0"), - list_cb => $VALID{$v} || do { - warn <<""; -`$v' is not a valid value for `$k' -$k be one of `all', `match=domain', or `404' - - *list_404; - }, + www_cb => set_cb($pi_config, 'publicInbox.wwwListing', 404), + manifest_cb => set_cb($pi_config, 'publicInbox.grokManifest', + 'match=domain'), }, $class; } @@ -76,26 +87,20 @@ sub ibx_entry { $tmp; } -# not really a stand-alone PSGI app, but maybe it could be... -sub call { - my ($self, $env) = @_; - my $h = [ 'Content-Type', 'text/html; charset=UTF-8' ]; - my $hide_key = 'www'; - if ($env->{PATH_INFO} =~ m!/manifest\.js(?:\.gz)\z/!) { - $hide_key = 'manifest'; - } - my $list = $self->{list_cb}->($self, $env, $hide_key); - my $code = 404; +sub html ($$) { + my ($env, $list) = @_; my $title = 'public-inbox'; my $out = ''; + my $code = 404; if (@$list) { + $title .= ' - listing'; + $code = 200; + # Swartzian transform since ->modified is expensive @$list = sort { $b->[0] <=> $a->[0] } map { [ $_->modified, $_ ] } @$list; - $code = 200; - $title .= ' - listing'; my $tmp = join("\n", map { ibx_entry(@$_, $env) } @$list); my $l = PublicInbox::Linkify->new; $l->linkify_1($tmp); @@ -104,7 +109,122 @@ sub call { $out = "$title" . $out; $out .= '
'. PublicInbox::WwwStream::code_footer($env) .
 		'
'; - [ $code, $h, [ $out ] ] + + my $h = [ 'Content-Type', 'text/html; charset=UTF-8' ]; + [ $code, $h, [ $out ] ]; +} + +my $json; +sub _json () { + for my $mod (qw(JSON::MaybeXS JSON JSON::PP)) { + eval "require $mod" or next; + # ->ascii encodes non-ASCII to "\uXXXX" + return $mod->new->ascii(1); + } + die; +} + +sub fingerprint ($) { + my ($git) = @_; + my $fh = $git->popen('show-ref') or + die "popen($git->{git_dir} show-ref) failed: $!"; + + my $dig = Digest::SHA->new(1); + while (read($fh, my $buf, 65536)) { + $dig->add($buf); + } + close $fh; + return if $?; # empty, uninitialized git repo + $dig->hexdigest; +} + +sub manifest_add ($$;$) { + my ($manifest, $ibx, $epoch) = @_; + my $url_path = "/$ibx->{name}"; + my $git_dir = $ibx->{mainrepo}; + if (defined $epoch) { + $git_dir .= "/git/$epoch.git"; + $url_path .= "/$epoch"; + } + return unless -d $git_dir; + my $git = PublicInbox::Git->new($git_dir); + my $fingerprint = fingerprint($git) or return; # no empty repos + + chomp(my $owner = $git->qx('config', 'gitweb.owner')); + chomp(my $desc = try_cat("$git_dir/description")); + $owner = undef if $owner eq ''; + $desc = 'Unnamed repository' if $desc eq ''; + + my $reference; + chomp(my $alt = try_cat("$git_dir/objects/info/alternates")); + if ($alt) { + # n.b.: GitPython doesn't seem to handle comments or C-quoted + # strings like native git does; and we don't for now, either. + my @alt = split(/\n+/, $alt); + + # grokmirror only supports 1 alternate for "reference", + if (scalar(@alt) == 1) { + my $objdir = "$git_dir/objects"; + $reference = File::Spec->rel2abs($alt[0], $objdir); + $reference =~ s!/[^/]+/?\z!!; # basename + } + } + $manifest->{-abs2urlpath}->{$git_dir} = $url_path; + my $modified = $git->modified; + if ($modified > $manifest->{-mtime}) { + $manifest->{-mtime} = $modified; + } + $manifest->{$url_path} = { + owner => $owner, + reference => $reference, + description => $desc, + modified => $modified, + fingerprint => $fingerprint, + }; +} + +# manifest.js.gz +sub js ($$) { + my ($env, $list) = @_; + eval { require IO::Compress::Gzip } or return [ 404, [], [] ]; + + my $manifest = { -abs2urlpath => {}, -mtime => 0 }; + for my $ibx (@$list) { + if (defined(my $max = $ibx->max_git_part)) { + for my $epoch (0..$max) { + manifest_add($manifest, $ibx, $epoch); + } + } else { + manifest_add($manifest, $ibx); + } + } + my $abs2urlpath = delete $manifest->{-abs2urlpath}; + my $mtime = delete $manifest->{-mtime}; + while (my ($url_path, $repo) = each %$manifest) { + defined(my $abs = $repo->{reference}) or next; + $repo->{reference} = $abs2urlpath->{$abs}; + } + my $out; + IO::Compress::Gzip::gzip(\(($json ||= _json())->encode($manifest)) => + \$out); + $manifest = undef; + [ 200, [ qw(Content-Type application/gzip), + 'Last-Modified', time2str($mtime), + 'Content-Length', bytes::length($out) ], [ $out ] ]; +} + +# not really a stand-alone PSGI app, but maybe it could be... +sub call { + my ($self, $env) = @_; + + if ($env->{PATH_INFO} eq '/manifest.js.gz') { + # grokmirror uses relative paths, so it's domain-dependent + my $list = $self->{manifest_cb}->($self, $env, 'manifest'); + js($env, $list); + } else { # / + my $list = $self->{www_cb}->($self, $env, 'www'); + html($env, $list); + } } 1; -- cgit v1.2.3-24-ge0c7 From 279a47f3f64fc7a414247922b870e58a0b334b0f Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Sun, 9 Jun 2019 04:31:04 +0000 Subject: www: wire up /$INBOX/manifest.js.gz, too I can imagine myself just wanting to clone a single v2 inbox and all its epochs without thinking about include/exclude rules in a grokmirror config file. --- lib/PublicInbox/WWW.pm | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'lib') diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index 614adad6..a5466980 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -126,6 +126,8 @@ sub call { get_text($ctx, $1, $2); } elsif ($path_info =~ m!$INBOX_RE/([a-zA-Z0-9_\-\.]+)\.css\z!o) { get_css($ctx, $1, $2); + } elsif ($path_info =~ m!$INBOX_RE/manifest\.js\.gz\z!o) { + get_inbox_manifest($ctx, $1, $2); } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/\z!o) { get_vcs_object($ctx, $1, $2); } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/ @@ -490,6 +492,15 @@ sub www_listing { } } +# GET $INBOX/manifest.js.gz +sub get_inbox_manifest ($$$) { + my ($ctx, $inbox, $key) = @_; + my $r404 = invalid_inbox($ctx, $inbox); + return $r404 if $r404; + require PublicInbox::WwwListing; + PublicInbox::WwwListing::js($ctx->{env}, [$ctx->{-inbox}]); +} + sub get_attach { my ($ctx, $idx, $fn) = @_; require PublicInbox::WwwAttach; -- cgit v1.2.3-24-ge0c7 From 0886c264b24b8bc7626e5a7eb59598b0229f066d Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Sun, 9 Jun 2019 04:31:05 +0000 Subject: www: support $INBOX/git/$EPOCH.git for v2 cloning And use it in manifest.js. To ease maintaining mirrors with grokmirror(1), we can accept a "git/" directory prefix before the epoch, and ".git" suffix after the epoch number. We maintain compatibility with "$INBOX/$EPOCH" cloning, of course, and it's still easier-to-type on the command-line. --- lib/PublicInbox/WWW.pm | 4 ++-- lib/PublicInbox/WwwListing.pm | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index a5466980..e4682636 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -74,7 +74,7 @@ sub call { my $method = $env->{REQUEST_METHOD}; if ($method eq 'POST') { - if ($path_info =~ m!$INBOX_RE/(?:([0-9]+)/)? + if ($path_info =~ m!$INBOX_RE/(?:(?:git/)?([0-9]+)(?:\.git)?/)? (git-upload-pack)\z!x) { my ($part, $path) = ($2, $3); return invalid_inbox($ctx, $1) || @@ -98,7 +98,7 @@ sub call { invalid_inbox($ctx, $1) || get_atom($ctx); } elsif ($path_info =~ m!$INBOX_RE/new\.html\z!o) { invalid_inbox($ctx, $1) || get_new($ctx); - } elsif ($path_info =~ m!$INBOX_RE/(?:([0-9]+)/)? + } elsif ($path_info =~ m!$INBOX_RE/(?:(?:git/)?([0-9]+)(?:\.git)?/)? ($PublicInbox::GitHTTPBackend::ANY)\z!ox) { my ($part, $path) = ($2, $3); invalid_inbox($ctx, $1) || serve_git($ctx, $part, $path); diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm index 690976ac..e2724cc4 100644 --- a/lib/PublicInbox/WwwListing.pm +++ b/lib/PublicInbox/WwwListing.pm @@ -144,7 +144,7 @@ sub manifest_add ($$;$) { my $git_dir = $ibx->{mainrepo}; if (defined $epoch) { $git_dir .= "/git/$epoch.git"; - $url_path .= "/$epoch"; + $url_path .= "/git/$epoch.git"; } return unless -d $git_dir; my $git = PublicInbox::Git->new($git_dir); -- cgit v1.2.3-24-ge0c7 From d3c94cf92e8a4693aa691f3464c94c00be543cfc Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Sun, 9 Jun 2019 00:53:29 +0000 Subject: git: ensure ->modified returns an integer We don't want to serialize timestamps as strings to JSON. I only noticed this bug on a 32-bit system. --- lib/PublicInbox/Git.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index 68445b3c..82510b99 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -320,7 +320,7 @@ sub modified ($) { chomp $oid; my $buf = cat_file($self, $oid) or next; $$buf =~ /^committer .*?> ([0-9]+) [\+\-]?[0-9]+/sm or next; - my $cmt_time = $1; + my $cmt_time = $1 + 0; $modified = $cmt_time if $cmt_time > $modified; } $modified || time; -- cgit v1.2.3-24-ge0c7