From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: AS24940 5.9.0.0/16 X-Spam-Status: No, score=-1.3 required=3.0 tests=AWL,BAYES_00,RCVD_IN_XBL, SPF_FAIL,SPF_HELO_FAIL,URIBL_BLOCKED shortcircuit=no autolearn=no autolearn_force=no version=3.4.0 Received: from 80x24.org (tor-relay.zwiebeltoralf.de [5.9.158.75]) by dcvr.yhbt.net (Postfix) with ESMTP id A0DAC1FCB7 for ; Sun, 15 May 2016 23:59:38 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 2/2] mbox: support /$INBOX/all.mbox.gz endpoint Date: Sun, 15 May 2016 23:59:32 +0000 Message-Id: <20160515235932.18313-3-e@80x24.org> In-Reply-To: <20160515235932.18313-1-e@80x24.org> References: <20160515235932.18313-1-e@80x24.org> List-Id: Allows easily downloading the entire archive without special tools. In any case, it's not yet advertised to via HTML until we can test it better. It'll also support range queries in the future to avoid wasting bandwidth. --- lib/PublicInbox/Mbox.pm | 63 +++++++++++++++++++++++++++++++++++++++++++++++++ lib/PublicInbox/WWW.pm | 11 +++++++++ 2 files changed, 74 insertions(+) diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm index efb13e5..4c4b74f 100644 --- a/lib/PublicInbox/Mbox.pm +++ b/lib/PublicInbox/Mbox.pm @@ -121,6 +121,69 @@ sub emit_mbox { $fh->close; } +sub emit_range { + my ($ctx, $range) = @_; + sub { _emit_range($_[0], $ctx, $range) }; +} + +sub _emit_range { + my ($res, $ctx, $range) = @_; + + eval { require IO::Compress::Gzip }; + return need_gzip($res) if $@; + my $query; + if ($range eq 'all') { # TODO: YYYY[-MM] + $query = ''; + } else { + $res->([404, [qw(Content-Type text/plain)], []]); + return; + } + + # http://www.iana.org/assignments/media-types/application/gzip + my $fh = $res->([200, [qw(Content-Type application/gzip)]]); + $fh = PublicInbox::MboxGz->new($fh); + my $env = $ctx->{cgi}->env; + my $srch = $ctx->{srch}; + my $git = $ctx->{git}; + my %opts = (offset => 0, asc => 1); + my $nr; + my $cb = sub { + my $res = $srch->query($query, \%opts); + my $msgs = $res->{msgs}; + $nr = scalar @$msgs; + while (defined(my $smsg = shift @$msgs)) { + my $msg = eval { + my $p = 'HEAD:'.mid2path($smsg->mid); + Email::Simple->new($git->cat_file($p)); + }; + emit_msg($ctx, $fh, $msg) if $msg; + } + + $opts{offset} += $nr; + }; + + $cb->(); # first part is free + return $fh->close if $nr == 0; + + if ($env->{'pi-httpd.async'}) { + my $io = $env->{'psgix.io'} or die "no IO"; + my $next; + $next = sub { + $cb->(); + if ($nr > 0) { + $io->write($next); + } else { + $next = undef; + $fh->close; + } + }; + $io->write($next); # Danga::Socket::write + return; + } + $cb->() while ($nr > 0); + $fh->close; +} + sub need_gzip { my $fh = $_[0]->([501, ['Content-Type' => 'text/html']]); my $title = 'gzipped mbox not available'; diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index 85cb234..51dc3da 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -68,6 +68,8 @@ sub call { my $path = $2; invalid_inbox($self, $ctx, $1) || serve_git($cgi, $ctx->{git}, $path); + } elsif ($path_info =~ m!$INBOX_RE/([\w-]+).mbox\.gz\z!o) { + serve_mbox_range($self, $ctx, $1, $2); } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/$END_RE\z!o) { msg_page($self, $ctx, $1, $2, $3); @@ -430,6 +432,15 @@ sub serve_git { PublicInbox::GitHTTPBackend::serve($cgi, $git, $path); } +sub serve_mbox_range { + my ($self, $ctx, $inbox, $range) = @_; + invalid_inbox($self, $ctx, $inbox) || eval { + require PublicInbox::Mbox; + searcher($ctx); + PublicInbox::Mbox::emit_range($ctx, $range); + } +} + sub news_www { my ($self) = @_; my $nw = $self->{news_www};