From 6512b1245ebc6fe30bb32227c0ef8f912d4988ab Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 21 Mar 2020 02:03:54 +0000 Subject: www: add endpoint to retrieve altid dumps This ensures all our indexed data, including data from altid searches (e.g. "gmane:$ARTNUM") is retrievable. It uses a "POST" request to avoid wasting cycles when invoked by crawlers, since it could potentially be several megabytes of data not indexable by search engines. --- lib/PublicInbox/WwwAltId.pm | 94 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 lib/PublicInbox/WwwAltId.pm (limited to 'lib/PublicInbox/WwwAltId.pm') diff --git a/lib/PublicInbox/WwwAltId.pm b/lib/PublicInbox/WwwAltId.pm new file mode 100644 index 00000000..34641a92 --- /dev/null +++ b/lib/PublicInbox/WwwAltId.pm @@ -0,0 +1,94 @@ +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ + +# dumps using the ".dump" command of sqlite3(1) +package PublicInbox::WwwAltId; +use strict; +use PublicInbox::Qspawn; +use PublicInbox::WwwStream; +use PublicInbox::AltId; +use PublicInbox::Spawn qw(which); +our $sqlite3 = $ENV{SQLITE3}; + +# returns prefix => pathname mapping +# (pathname is NOT public, but prefix is used for Xapian queries) +sub altid_map ($) { + my ($ibx) = @_; + my $altid = $ibx->{altid} or return {}; + my %h = map {; + my $x = PublicInbox::AltId->new($ibx, $_); + "$x->{prefix}" => $x->{filename} + } @$altid; + \%h; +} + +sub sqlite3_missing ($) { + PublicInbox::WwwResponse::oneshot($_[0], 501, \<sqlite3 not available + +The administrator needs to install the sqlite3(1) binary +to support gzipped sqlite3 dumps. + +EOF +} + +sub check_output { + my ($r, $bref, $ctx) = @_; + return PublicInbox::WwwResponse::oneshot($ctx, 500) if !defined($r); + if ($r == 0) { + my $err = eval { $ctx->{env}->{'psgi.errors'} } // \*STDERR; + $err->print("unexpected EOF from sqlite3\n"); + return PublicInbox::WwwResponse::oneshot($ctx, 501); + } + [200, [ qw(Content-Type application/gzip), 'Content-Disposition', + "inline; filename=$ctx->{altid_pfx}.sql.gz" ] ] +} + +# POST $INBOX/$prefix.sql.gz +# we use the sqlite3(1) binary here since that's where the ".dump" +# command is implemented, not (AFAIK) in the libsqlite3 library +# and thus not usable from DBD::SQLite. +sub sqldump ($$) { + my ($ctx, $altid_pfx) = @_; + my $ibx = $ctx->{-inbox}; + my $altid_map = $ibx->{-altid_map} //= altid_map($ibx); + my $fn = $altid_map->{$altid_pfx}; + unless (defined $fn) { + return PublicInbox::WwwStream::oneshot($ctx, 404, \<`$altid_pfx' is not a valid altid for this inbox +EOF + } + + eval { require PublicInbox::GzipFilter } or + return PublicInbox::WwwStream::oneshot($ctx, 501, \<gzip output not available + +The administrator needs to install the Compress::Raw::Zlib Perl module +to support gzipped sqlite3 dumps. +EOF + $sqlite3 //= which('sqlite3'); + if (!defined($sqlite3)) { + return PublicInbox::WwwStream::oneshot($ctx, 501, \<sqlite3 not available + +The administrator needs to install the sqlite3(1) binary +to support gzipped sqlite3 dumps. + +EOF + } + + # setup stdin, POSIX requires writes <= 512 bytes to succeed so + # we can close the pipe right away. + pipe(my ($r, $w)) or die "pipe: $!"; + syswrite($w, ".dump\n") == 6 or die "write: $!"; + close($w) or die "close: $!"; + + # TODO: use -readonly if available with newer sqlite3(1) + my $qsp = PublicInbox::Qspawn->new([$sqlite3, $fn], undef, { 0 => $r }); + my $env = $ctx->{env}; + $ctx->{altid_pfx} = $altid_pfx; + $env->{'qspawn.filter'} = PublicInbox::GzipFilter->new; + $qsp->psgi_return($env, undef, \&check_output, $ctx); +} + +1; -- cgit v1.2.3-24-ge0c7