about summary refs log tree commit homepage
path: root/lib/PublicInbox
diff options
context:
space:
mode:
authorEric Wong (Contractor, The Linux Foundation) <e@80x24.org>2019-06-09 04:31:03 +0000
committerEric Wong <e@80x24.org>2019-06-09 04:32:31 +0000
commit0b3e19584c90d958a723ac2d3dec3f84f5513688 (patch)
tree6cad532198521b6fc6afe15f415cbd51a6bcb3d2 /lib/PublicInbox
parent3e0e596105198cfad0eaf3e15f69a21c6bc9ffe1 (diff)
downloadpublic-inbox-0b3e19584c90d958a723ac2d3dec3f84f5513688.tar.gz
Support on-demand generation of "/manifest.js.gz" for inboxes.
By default, this matches inboxes with URLs matching the given
request hostname by default.

This makes it easier to create full mirrors of several inboxes
without needing to configure static file serving.

cf. https://git.kernel.org/pub/scm/utils/grokmirror/grokmirror.git
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r--lib/PublicInbox/WWW.pm2
-rw-r--r--lib/PublicInbox/WwwListing.pm164
2 files changed, 143 insertions, 23 deletions
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 7ea98204..614adad6 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -88,7 +88,7 @@ sub call {
         }
 
         # top-level indices and feeds
-        if ($path_info eq '/') {
+        if ($path_info eq '/' || $path_info eq '/manifest.js.gz') {
                 www_listing($self)->call($env);
         } elsif ($path_info =~ m!$INBOX_RE\z!o) {
                 invalid_inbox($ctx, $1) || r301($ctx, $1);
diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm
index 6d6d3015..690976ac 100644
--- a/lib/PublicInbox/WwwListing.pm
+++ b/lib/PublicInbox/WwwListing.pm
@@ -9,6 +9,11 @@ use warnings;
 use PublicInbox::Hval qw(ascii_html);
 use PublicInbox::Linkify;
 use PublicInbox::View;
+use bytes ();
+use HTTP::Date qw(time2str);
+require Digest::SHA;
+require File::Spec;
+{ no warnings 'once'; *try_cat = *PublicInbox::Inbox::try_cat };
 
 sub list_all ($$$) {
         my ($self, $env, $hide_key) = @_;
@@ -44,21 +49,27 @@ my %VALID = (
         404 => *list_404,
 );
 
+sub set_cb ($$$) {
+        my ($pi_config, $k, $default) = @_;
+        my $v = $pi_config->{lc $k} // $default;
+        $VALID{$v} || do {
+                warn <<"";
+`$v' is not a valid value for `$k'
+$k be one of `all', `match=domain', or `404'
+
+                $VALID{$default};
+        };
+}
+
 sub new {
         my ($class, $www) = @_;
-        my $k = 'publicinbox.wwwListing';
         my $pi_config = $www->{pi_config};
-        my $v = $pi_config->{lc($k)} // 404;
         bless {
                 pi_config => $pi_config,
                 style => $www->style("\0"),
-                list_cb => $VALID{$v} || do {
-                        warn <<"";
-`$v' is not a valid value for `$k'
-$k be one of `all', `match=domain', or `404'
-
-                        *list_404;
-                },
+                www_cb => set_cb($pi_config, 'publicInbox.wwwListing', 404),
+                manifest_cb => set_cb($pi_config, 'publicInbox.grokManifest',
+                                        'match=domain'),
         }, $class;
 }
 
@@ -76,26 +87,20 @@ sub ibx_entry {
         $tmp;
 }
 
-# not really a stand-alone PSGI app, but maybe it could be...
-sub call {
-        my ($self, $env) = @_;
-        my $h = [ 'Content-Type', 'text/html; charset=UTF-8' ];
-        my $hide_key = 'www';
-        if ($env->{PATH_INFO} =~ m!/manifest\.js(?:\.gz)\z/!) {
-                $hide_key = 'manifest';
-        }
-        my $list = $self->{list_cb}->($self, $env, $hide_key);
-        my $code = 404;
+sub html ($$) {
+        my ($env, $list) = @_;
         my $title = 'public-inbox';
         my $out = '';
+        my $code = 404;
         if (@$list) {
+                $title .= ' - listing';
+                $code = 200;
+
                 # Swartzian transform since ->modified is expensive
                 @$list = sort {
                         $b->[0] <=> $a->[0]
                 } map { [ $_->modified, $_ ] } @$list;
 
-                $code = 200;
-                $title .= ' - listing';
                 my $tmp = join("\n", map { ibx_entry(@$_, $env) } @$list);
                 my $l = PublicInbox::Linkify->new;
                 $l->linkify_1($tmp);
@@ -104,7 +109,122 @@ sub call {
         $out = "<html><head><title>$title</title></head><body>" . $out;
         $out .= '<pre>'. PublicInbox::WwwStream::code_footer($env) .
                 '</pre></body></html>';
-        [ $code, $h, [ $out ] ]
+
+        my $h = [ 'Content-Type', 'text/html; charset=UTF-8' ];
+        [ $code, $h, [ $out ] ];
+}
+
+my $json;
+sub _json () {
+        for my $mod (qw(JSON::MaybeXS JSON JSON::PP)) {
+                eval "require $mod" or next;
+                # ->ascii encodes non-ASCII to "\uXXXX"
+                return $mod->new->ascii(1);
+        }
+        die;
+}
+
+sub fingerprint ($) {
+        my ($git) = @_;
+        my $fh = $git->popen('show-ref') or
+                die "popen($git->{git_dir} show-ref) failed: $!";
+
+        my $dig = Digest::SHA->new(1);
+        while (read($fh, my $buf, 65536)) {
+                $dig->add($buf);
+        }
+        close $fh;
+        return if $?; # empty, uninitialized git repo
+        $dig->hexdigest;
+}
+
+sub manifest_add ($$;$) {
+        my ($manifest, $ibx, $epoch) = @_;
+        my $url_path = "/$ibx->{name}";
+        my $git_dir = $ibx->{mainrepo};
+        if (defined $epoch) {
+                $git_dir .= "/git/$epoch.git";
+                $url_path .= "/$epoch";
+        }
+        return unless -d $git_dir;
+        my $git = PublicInbox::Git->new($git_dir);
+        my $fingerprint = fingerprint($git) or return; # no empty repos
+
+        chomp(my $owner = $git->qx('config', 'gitweb.owner'));
+        chomp(my $desc = try_cat("$git_dir/description"));
+        $owner = undef if $owner eq '';
+        $desc = 'Unnamed repository' if $desc eq '';
+
+        my $reference;
+        chomp(my $alt = try_cat("$git_dir/objects/info/alternates"));
+        if ($alt) {
+                # n.b.: GitPython doesn't seem to handle comments or C-quoted
+                # strings like native git does; and we don't for now, either.
+                my @alt = split(/\n+/, $alt);
+
+                # grokmirror only supports 1 alternate for "reference",
+                if (scalar(@alt) == 1) {
+                        my $objdir = "$git_dir/objects";
+                        $reference = File::Spec->rel2abs($alt[0], $objdir);
+                        $reference =~ s!/[^/]+/?\z!!; # basename
+                }
+        }
+        $manifest->{-abs2urlpath}->{$git_dir} = $url_path;
+        my $modified = $git->modified;
+        if ($modified > $manifest->{-mtime}) {
+                $manifest->{-mtime} = $modified;
+        }
+        $manifest->{$url_path} = {
+                owner => $owner,
+                reference => $reference,
+                description => $desc,
+                modified => $modified,
+                fingerprint => $fingerprint,
+        };
+}
+
+# manifest.js.gz
+sub js ($$) {
+        my ($env, $list) = @_;
+        eval { require IO::Compress::Gzip } or return [ 404, [], [] ];
+
+        my $manifest = { -abs2urlpath => {}, -mtime => 0 };
+        for my $ibx (@$list) {
+                if (defined(my $max = $ibx->max_git_part)) {
+                        for my $epoch (0..$max) {
+                                manifest_add($manifest, $ibx, $epoch);
+                        }
+                } else {
+                        manifest_add($manifest, $ibx);
+                }
+        }
+        my $abs2urlpath = delete $manifest->{-abs2urlpath};
+        my $mtime = delete $manifest->{-mtime};
+        while (my ($url_path, $repo) = each %$manifest) {
+                defined(my $abs = $repo->{reference}) or next;
+                $repo->{reference} = $abs2urlpath->{$abs};
+        }
+        my $out;
+        IO::Compress::Gzip::gzip(\(($json ||= _json())->encode($manifest)) =>
+                                 \$out);
+        $manifest = undef;
+        [ 200, [ qw(Content-Type application/gzip),
+                 'Last-Modified', time2str($mtime),
+                 'Content-Length', bytes::length($out) ], [ $out ] ];
+}
+
+# not really a stand-alone PSGI app, but maybe it could be...
+sub call {
+        my ($self, $env) = @_;
+
+        if ($env->{PATH_INFO} eq '/manifest.js.gz') {
+                # grokmirror uses relative paths, so it's domain-dependent
+                my $list = $self->{manifest_cb}->($self, $env, 'manifest');
+                js($env, $list);
+        } else { # /
+                my $list = $self->{www_cb}->($self, $env, 'www');
+                html($env, $list);
+        }
 }
 
 1;