From 1269631ae596b910b7c26890c3efa9f85301a89b Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 15 May 2016 23:30:06 +0000 Subject: mbox: support /$INBOX/all.mbox.gz endpoint Allows easily downloading the entire archive without special tools. In any case, it's not yet advertised to via HTML until we can test it better. It'll also support range queries in the future to avoid wasting bandwidth. --- lib/PublicInbox/Mbox.pm | 63 +++++++++++++++++++++++++++++++++++++++++++++++++ lib/PublicInbox/WWW.pm | 11 +++++++++ 2 files changed, 74 insertions(+) (limited to 'lib') diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm index efb13e5b..4c4b74fb 100644 --- a/lib/PublicInbox/Mbox.pm +++ b/lib/PublicInbox/Mbox.pm @@ -121,6 +121,69 @@ sub emit_mbox { $fh->close; } +sub emit_range { + my ($ctx, $range) = @_; + sub { _emit_range($_[0], $ctx, $range) }; +} + +sub _emit_range { + my ($res, $ctx, $range) = @_; + + eval { require IO::Compress::Gzip }; + return need_gzip($res) if $@; + my $query; + if ($range eq 'all') { # TODO: YYYY[-MM] + $query = ''; + } else { + $res->([404, [qw(Content-Type text/plain)], []]); + return; + } + + # http://www.iana.org/assignments/media-types/application/gzip + my $fh = $res->([200, [qw(Content-Type application/gzip)]]); + $fh = PublicInbox::MboxGz->new($fh); + my $env = $ctx->{cgi}->env; + my $srch = $ctx->{srch}; + my $git = $ctx->{git}; + my %opts = (offset => 0, asc => 1); + my $nr; + my $cb = sub { + my $res = $srch->query($query, \%opts); + my $msgs = $res->{msgs}; + $nr = scalar @$msgs; + while (defined(my $smsg = shift @$msgs)) { + my $msg = eval { + my $p = 'HEAD:'.mid2path($smsg->mid); + Email::Simple->new($git->cat_file($p)); + }; + emit_msg($ctx, $fh, $msg) if $msg; + } + + $opts{offset} += $nr; + }; + + $cb->(); # first part is free + return $fh->close if $nr == 0; + + if ($env->{'pi-httpd.async'}) { + my $io = $env->{'psgix.io'} or die "no IO"; + my $next; + $next = sub { + $cb->(); + if ($nr > 0) { + $io->write($next); + } else { + $next = undef; + $fh->close; + } + }; + $io->write($next); # Danga::Socket::write + return; + } + $cb->() while ($nr > 0); + $fh->close; +} + sub need_gzip { my $fh = $_[0]->([501, ['Content-Type' => 'text/html']]); my $title = 'gzipped mbox not available'; diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index 85cb234b..51dc3daa 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -68,6 +68,8 @@ sub call { my $path = $2; invalid_inbox($self, $ctx, $1) || serve_git($cgi, $ctx->{git}, $path); + } elsif ($path_info =~ m!$INBOX_RE/([\w-]+).mbox\.gz\z!o) { + serve_mbox_range($self, $ctx, $1, $2); } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/$END_RE\z!o) { msg_page($self, $ctx, $1, $2, $3); @@ -430,6 +432,15 @@ sub serve_git { PublicInbox::GitHTTPBackend::serve($cgi, $git, $path); } +sub serve_mbox_range { + my ($self, $ctx, $inbox, $range) = @_; + invalid_inbox($self, $ctx, $inbox) || eval { + require PublicInbox::Mbox; + searcher($ctx); + PublicInbox::Mbox::emit_range($ctx, $range); + } +} + sub news_www { my ($self) = @_; my $nw = $self->{news_www}; -- cgit v1.2.3-24-ge0c7