* [PATCH] wwwstream::oneshot => html_oneshot
@ 2020-03-30 5:18 4% Eric Wong
0 siblings, 0 replies; 3+ results
From: Eric Wong @ 2020-03-30 5:18 UTC (permalink / raw)
To: meta
And use Exporter to make our life easier, since WwwAltId was
using a non-existent PublicInbox::WwwResponse namespace in error
paths which doesn't get noticed by `perl -c' or exercised by
tests on normal systems.
Fixes: 6512b1245ebc6fe3 ("www: add endpoint to retrieve altid dumps")
---
lib/PublicInbox/ExtMsg.pm | 6 +++---
lib/PublicInbox/Mbox.pm | 3 ++-
lib/PublicInbox/ViewVCS.pm | 4 ++--
lib/PublicInbox/WwwAltId.pm | 16 ++++++++--------
lib/PublicInbox/WwwStream.pm | 4 +++-
5 files changed, 18 insertions(+), 15 deletions(-)
diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm
index a14eddf6..1d17c2ce 100644
--- a/lib/PublicInbox/ExtMsg.pm
+++ b/lib/PublicInbox/ExtMsg.pm
@@ -9,7 +9,7 @@ package PublicInbox::ExtMsg;
use strict;
use warnings;
use PublicInbox::Hval qw(ascii_html prurl mid_href);
-use PublicInbox::WwwStream;
+use PublicInbox::WwwStream qw(html_oneshot);
use PublicInbox::Smsg;
our $MIN_PARTIAL_LEN = 16;
@@ -159,7 +159,7 @@ sub ext_msg {
$ctx->{-html_tip} = $s .= '</pre>';
$ctx->{-title_html} = $title;
$ctx->{-upfx} = '../';
- PublicInbox::WwwStream::oneshot($ctx, $code);
+ html_oneshot($ctx, $code);
}
sub ext_urls {
@@ -197,7 +197,7 @@ sub exact {
qq(<a\nhref="$u$href/">$u$html/</a>\n)
} @$found),
$ext_urls, '</pre>');
- PublicInbox::WwwStream::oneshot($ctx, $code);
+ html_oneshot($ctx, $code);
}
1;
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 46964bbb..3013dc91 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -13,6 +13,7 @@ use warnings;
use PublicInbox::MID qw/mid_escape/;
use PublicInbox::Hval qw/to_filename/;
use PublicInbox::Smsg;
+use PublicInbox::WwwStream qw(html_oneshot);
use Email::Simple;
use Email::MIME::Encode;
@@ -236,7 +237,7 @@ sub mbox_all {
}
sub need_gzip {
- PublicInbox::WwwStream::oneshot($_[0], 501, \<<EOF);
+ html_oneshot($_[0], 501, \<<EOF);
<pre>gzipped mbox not available
The administrator needs to install the Compress::Raw::Zlib Perl module
diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index 6714e67c..069937c1 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -18,7 +18,7 @@ use strict;
use warnings;
use bytes (); # only for bytes::length
use PublicInbox::SolverGit;
-use PublicInbox::WwwStream;
+use PublicInbox::WwwStream qw(html_oneshot);
use PublicInbox::Linkify;
use PublicInbox::Tmpfile;
use PublicInbox::Hval qw(ascii_html to_filename);
@@ -35,7 +35,7 @@ sub html_page ($$$) {
my ($ctx, $code, $strref) = @_;
my $wcb = delete $ctx->{-wcb};
$ctx->{-upfx} = '../../'; # from "/$INBOX/$OID/s/"
- my $res = PublicInbox::WwwStream::oneshot($ctx, $code, $strref);
+ my $res = html_oneshot($ctx, $code, $strref);
$wcb ? $wcb->($res) : $res;
}
diff --git a/lib/PublicInbox/WwwAltId.pm b/lib/PublicInbox/WwwAltId.pm
index 263e884a..630ed686 100644
--- a/lib/PublicInbox/WwwAltId.pm
+++ b/lib/PublicInbox/WwwAltId.pm
@@ -5,13 +5,13 @@
package PublicInbox::WwwAltId;
use strict;
use PublicInbox::Qspawn;
-use PublicInbox::WwwStream;
+use PublicInbox::WwwStream qw(html_oneshot);
use PublicInbox::AltId;
use PublicInbox::Spawn qw(which);
our $sqlite3 = $ENV{SQLITE3};
sub sqlite3_missing ($) {
- PublicInbox::WwwResponse::oneshot($_[0], 501, \<<EOF);
+ html_oneshot($_[0], 501, \<<EOF);
<pre>sqlite3 not available
The administrator needs to install the sqlite3(1) binary
@@ -22,11 +22,11 @@ EOF
sub check_output {
my ($r, $bref, $ctx) = @_;
- return PublicInbox::WwwResponse::oneshot($ctx, 500) if !defined($r);
+ return html_oneshot($ctx, 500) if !defined($r);
if ($r == 0) {
my $err = eval { $ctx->{env}->{'psgi.errors'} } // \*STDERR;
$err->print("unexpected EOF from sqlite3\n");
- return PublicInbox::WwwResponse::oneshot($ctx, 501);
+ return html_oneshot($ctx, 501);
}
[200, [ qw(Content-Type application/gzip), 'Content-Disposition',
"inline; filename=$ctx->{altid_pfx}.sql.gz" ] ]
@@ -43,14 +43,14 @@ sub sqldump ($$) {
my $altid_map = $ibx->altid_map;
my $fn = $altid_map->{$altid_pfx};
unless (defined $fn) {
- return PublicInbox::WwwStream::oneshot($ctx, 404, \<<EOF);
+ return html_oneshot($ctx, 404, \<<EOF);
<pre>`$altid_pfx' is not a valid altid for this inbox</pre>
EOF
}
if ($env->{REQUEST_METHOD} ne 'POST') {
my $url = $ibx->base_url($ctx->{env}) . "$altid_pfx.sql.gz";
- return PublicInbox::WwwStream::oneshot($ctx, 405, \<<EOF);
+ return html_oneshot($ctx, 405, \<<EOF);
<pre>A POST request required to retrieve $altid_pfx.sql.gz
curl -XPOST -O $url
@@ -65,7 +65,7 @@ EOF
}
eval { require PublicInbox::GzipFilter } or
- return PublicInbox::WwwStream::oneshot($ctx, 501, \<<EOF);
+ return html_oneshot($ctx, 501, \<<EOF);
<pre>gzip output not available
The administrator needs to install the Compress::Raw::Zlib Perl module
@@ -73,7 +73,7 @@ to support gzipped sqlite3 dumps.</pre>
EOF
$sqlite3 //= which('sqlite3');
if (!defined($sqlite3)) {
- return PublicInbox::WwwStream::oneshot($ctx, 501, \<<EOF);
+ return html_oneshot($ctx, 501, \<<EOF);
<pre>sqlite3 not available
The administrator needs to install the sqlite3(1) binary
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index 985e0262..915a71ba 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -9,6 +9,8 @@
package PublicInbox::WwwStream;
use strict;
use warnings;
+use base qw(Exporter);
+our @EXPORT_OK = qw(html_oneshot);
use bytes (); # length
use PublicInbox::Hval qw(ascii_html prurl);
our $TOR_URL = 'https://www.torproject.org/';
@@ -170,7 +172,7 @@ sub getline {
delete $self->{cb} ? _html_end($self) : undef;
}
-sub oneshot {
+sub html_oneshot ($$;$) {
my ($ctx, $code, $sref) = @_;
my $self = bless {
ctx => $ctx,
^ permalink raw reply related [relevance 4%]
* [PATCH 11/11] www: add endpoint to retrieve altid dumps
2020-03-21 2:03 7% [PATCH 00/11] www: export SQLite altid dumps Eric Wong
@ 2020-03-21 2:03 4% ` Eric Wong
0 siblings, 0 replies; 3+ results
From: Eric Wong @ 2020-03-21 2:03 UTC (permalink / raw)
To: meta
This ensures all our indexed data, including data from altid
searches (e.g. "gmane:$ARTNUM") is retrievable.
It uses a "POST" request to avoid wasting cycles when invoked by
crawlers, since it could potentially be several megabytes of
data not indexable by search engines.
---
MANIFEST | 2 +
lib/PublicInbox/AltId.pm | 1 +
lib/PublicInbox/WWW.pm | 14 +++++-
lib/PublicInbox/WwwAltId.pm | 94 +++++++++++++++++++++++++++++++++++++
t/www_altid.t | 83 ++++++++++++++++++++++++++++++++
5 files changed, 192 insertions(+), 2 deletions(-)
create mode 100644 lib/PublicInbox/WwwAltId.pm
create mode 100644 t/www_altid.t
diff --git a/MANIFEST b/MANIFEST
index be1c4ab5..84872561 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -168,6 +168,7 @@ lib/PublicInbox/ViewVCS.pm
lib/PublicInbox/WWW.pm
lib/PublicInbox/WWW.pod
lib/PublicInbox/WatchMaildir.pm
+lib/PublicInbox/WwwAltId.pm
lib/PublicInbox/WwwAtomStream.pm
lib/PublicInbox/WwwAttach.pm
lib/PublicInbox/WwwHighlight.pm
@@ -300,6 +301,7 @@ t/view.t
t/watch_filter_rubylang.t
t/watch_maildir.t
t/watch_maildir_v2.t
+t/www_altid.t
t/www_listing.t
t/www_static.t
t/x-unknown-alpine.eml
diff --git a/lib/PublicInbox/AltId.pm b/lib/PublicInbox/AltId.pm
index 3be6c73c..6d16242a 100644
--- a/lib/PublicInbox/AltId.pm
+++ b/lib/PublicInbox/AltId.pm
@@ -39,6 +39,7 @@ sub new {
bless {
filename => $f,
writable => $writable,
+ prefix => $prefix,
xprefix => 'X'.uc($prefix),
}, $class;
}
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 2434f2f5..5017f572 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -65,6 +65,8 @@ sub call {
my ($epoch, $path) = ($2, $3);
return invalid_inbox($ctx, $1) ||
serve_git($ctx, $epoch, $path);
+ } elsif ($path_info =~ m!$INBOX_RE/(\w+)\.sql\.gz\z!o) {
+ return get_altid_dump($ctx, $1, $2);
} elsif ($path_info =~ m!$INBOX_RE/!o) {
return invalid_inbox($ctx, $1) || mbox_results($ctx);
}
@@ -150,8 +152,8 @@ sub preload {
require PublicInbox::Search;
PublicInbox::Search::load_xapian();
};
- foreach (qw(PublicInbox::SearchView PublicInbox::MboxGz)) {
- eval "require $_;";
+ for (qw(SearchView MboxGz WwwAltId)) {
+ eval "require PublicInbox::$_;";
}
if (ref($self)) {
my $pi_config = $self->{pi_config};
@@ -301,6 +303,14 @@ sub get_vcs_object ($$$;$) {
PublicInbox::ViewVCS::show($ctx, $oid, $filename);
}
+sub get_altid_dump {
+ my ($ctx, $inbox, $altid_pfx) =@_;
+ my $r404 = invalid_inbox($ctx, $inbox);
+ return $r404 if $r404;
+ eval { require PublicInbox::WwwAltId } or return need($ctx, 'sqlite3');
+ PublicInbox::WwwAltId::sqldump($ctx, $altid_pfx);
+}
+
sub need {
my ($ctx, $extra) = @_;
my $msg = <<EOF;
diff --git a/lib/PublicInbox/WwwAltId.pm b/lib/PublicInbox/WwwAltId.pm
new file mode 100644
index 00000000..34641a92
--- /dev/null
+++ b/lib/PublicInbox/WwwAltId.pm
@@ -0,0 +1,94 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# dumps using the ".dump" command of sqlite3(1)
+package PublicInbox::WwwAltId;
+use strict;
+use PublicInbox::Qspawn;
+use PublicInbox::WwwStream;
+use PublicInbox::AltId;
+use PublicInbox::Spawn qw(which);
+our $sqlite3 = $ENV{SQLITE3};
+
+# returns prefix => pathname mapping
+# (pathname is NOT public, but prefix is used for Xapian queries)
+sub altid_map ($) {
+ my ($ibx) = @_;
+ my $altid = $ibx->{altid} or return {};
+ my %h = map {;
+ my $x = PublicInbox::AltId->new($ibx, $_);
+ "$x->{prefix}" => $x->{filename}
+ } @$altid;
+ \%h;
+}
+
+sub sqlite3_missing ($) {
+ PublicInbox::WwwResponse::oneshot($_[0], 501, \<<EOF);
+<pre>sqlite3 not available
+
+The administrator needs to install the sqlite3(1) binary
+to support gzipped sqlite3 dumps.</pre>
+</pre>
+EOF
+}
+
+sub check_output {
+ my ($r, $bref, $ctx) = @_;
+ return PublicInbox::WwwResponse::oneshot($ctx, 500) if !defined($r);
+ if ($r == 0) {
+ my $err = eval { $ctx->{env}->{'psgi.errors'} } // \*STDERR;
+ $err->print("unexpected EOF from sqlite3\n");
+ return PublicInbox::WwwResponse::oneshot($ctx, 501);
+ }
+ [200, [ qw(Content-Type application/gzip), 'Content-Disposition',
+ "inline; filename=$ctx->{altid_pfx}.sql.gz" ] ]
+}
+
+# POST $INBOX/$prefix.sql.gz
+# we use the sqlite3(1) binary here since that's where the ".dump"
+# command is implemented, not (AFAIK) in the libsqlite3 library
+# and thus not usable from DBD::SQLite.
+sub sqldump ($$) {
+ my ($ctx, $altid_pfx) = @_;
+ my $ibx = $ctx->{-inbox};
+ my $altid_map = $ibx->{-altid_map} //= altid_map($ibx);
+ my $fn = $altid_map->{$altid_pfx};
+ unless (defined $fn) {
+ return PublicInbox::WwwStream::oneshot($ctx, 404, \<<EOF);
+<pre>`$altid_pfx' is not a valid altid for this inbox</pre>
+EOF
+ }
+
+ eval { require PublicInbox::GzipFilter } or
+ return PublicInbox::WwwStream::oneshot($ctx, 501, \<<EOF);
+<pre>gzip output not available
+
+The administrator needs to install the Compress::Raw::Zlib Perl module
+to support gzipped sqlite3 dumps.</pre>
+EOF
+ $sqlite3 //= which('sqlite3');
+ if (!defined($sqlite3)) {
+ return PublicInbox::WwwStream::oneshot($ctx, 501, \<<EOF);
+<pre>sqlite3 not available
+
+The administrator needs to install the sqlite3(1) binary
+to support gzipped sqlite3 dumps.</pre>
+</pre>
+EOF
+ }
+
+ # setup stdin, POSIX requires writes <= 512 bytes to succeed so
+ # we can close the pipe right away.
+ pipe(my ($r, $w)) or die "pipe: $!";
+ syswrite($w, ".dump\n") == 6 or die "write: $!";
+ close($w) or die "close: $!";
+
+ # TODO: use -readonly if available with newer sqlite3(1)
+ my $qsp = PublicInbox::Qspawn->new([$sqlite3, $fn], undef, { 0 => $r });
+ my $env = $ctx->{env};
+ $ctx->{altid_pfx} = $altid_pfx;
+ $env->{'qspawn.filter'} = PublicInbox::GzipFilter->new;
+ $qsp->psgi_return($env, undef, \&check_output, $ctx);
+}
+
+1;
diff --git a/t/www_altid.t b/t/www_altid.t
new file mode 100644
index 00000000..a885c389
--- /dev/null
+++ b/t/www_altid.t
@@ -0,0 +1,83 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use Test::More;
+use PublicInbox::TestCommon;
+use PublicInbox::Inbox;
+use PublicInbox::InboxWritable;
+use PublicInbox::Config;
+use PublicInbox::Spawn qw(which spawn);
+which('sqlite3') or plan skip_all => 'sqlite3 binary missing';
+require_mods(qw(DBD::SQLite HTTP::Request::Common Plack::Test URI::Escape
+ Plack::Builder IO::Uncompress::Gunzip));
+use_ok($_) for qw(Plack::Test HTTP::Request::Common);
+require_ok 'PublicInbox::Msgmap';
+require_ok 'PublicInbox::AltId';
+require_ok 'PublicInbox::WWW';
+my ($inboxdir, $for_destroy) = tmpdir();
+my $aid = 'xyz';
+my $spec = "serial:$aid:file=blah.sqlite3";
+if ('setup') {
+ my $opts = {
+ inboxdir => $inboxdir,
+ name => 'test',
+ -primary_address => 'test@example.com',
+ };
+ my $ibx = PublicInbox::Inbox->new($opts);
+ $ibx = PublicInbox::InboxWritable->new($ibx, 1);
+ my $im = $ibx->importer(0);
+ my $mime = PublicInbox::MIME->new(<<'EOF');
+From: a@example.com
+Message-Id: <a@example.com>
+
+EOF
+ $im->add($mime);
+ $im->done;
+ mkdir "$inboxdir/public-inbox" or die;
+ my $altid = PublicInbox::AltId->new($ibx, $spec, 1);
+ $altid->mm_alt->mid_set(1, 'a@example.com');
+}
+
+my $cfgpath = "$inboxdir/cfg";
+open my $fh, '>', $cfgpath or die;
+print $fh <<EOF or die;
+[publicinbox "test"]
+ inboxdir = $inboxdir
+ address = test\@example.com
+ altid = $spec
+ url = http://example.com/test
+EOF
+close $fh or die;
+my $cfg = PublicInbox::Config->new($cfgpath);
+my $www = PublicInbox::WWW->new($cfg);
+my $cmpfile = "$inboxdir/cmp.sqlite3";
+my $client = sub {
+ my ($cb) = @_;
+ my $res = $cb->(POST("/test/$aid.sql.gz"));
+ is($res->code, 200, 'retrieved gzipped dump');
+ IO::Uncompress::Gunzip::gunzip(\($res->content) => \(my $buf));
+ pipe(my ($r, $w)) or die;
+ my $cmd = ['sqlite3', $cmpfile];
+ my $pid = spawn($cmd, undef, { 0 => $r });
+ print $w $buf or die;
+ close $w or die;
+ is(waitpid($pid, 0), $pid, 'sqlite3 exited');
+ is($?, 0, 'sqlite3 loaded dump');
+ my $mm_cmp = PublicInbox::Msgmap->new_file($cmpfile);
+ is($mm_cmp->mid_for(1), 'a@example.com', 'sqlite3 dump valid');
+ $mm_cmp = undef;
+ unlink $cmpfile or die;
+};
+test_psgi(sub { $www->call(@_) }, $client);
+SKIP: {
+ require_mods(qw(Plack::Test::ExternalServer), 4);
+ my $env = { PI_CONFIG => $cfgpath };
+ my $sock = tcp_server() or die;
+ my ($out, $err) = map { "$inboxdir/std$_.log" } qw(out err);
+ my $cmd = [ qw(-httpd -W0), "--stdout=$out", "--stderr=$err" ];
+ my $td = start_script($cmd, $env, { 3 => $sock });
+ my ($h, $p) = ($sock->sockhost, $sock->sockport);
+ local $ENV{PLACK_TEST_EXTERNALSERVER_URI} = "http://$h:$p";
+ Plack::Test::ExternalServer::test_psgi(client => $client);
+}
+done_testing;
^ permalink raw reply related [relevance 4%]
* [PATCH 00/11] www: export SQLite altid dumps
@ 2020-03-21 2:03 7% Eric Wong
2020-03-21 2:03 4% ` [PATCH 11/11] www: add endpoint to retrieve " Eric Wong
0 siblings, 1 reply; 3+ results
From: Eric Wong @ 2020-03-21 2:03 UTC (permalink / raw)
To: meta
To improve reproducibility in mirrors, altid dumps can be
exported via "POST /$INBOX_URL/$prefix.sql.gz". $prefix is
something like "gmane" (though the search prefix is "gmane:"
with a colon).
Eric Wong (11):
qspawn: reinstate filter support, add gzip filter
gzipfilter: lazy allocate the deflate context
wwwstream: introduce oneshot API to avoid ->getline
extmsg: use WwwResponse::oneshot
wwwstream: oneshot sets content-length
mbox: need_gzip uses WwwStream::oneshot
qspawn: handle ENOENT (and other errors on exec)
search: clobber -user_pfx on query parser initialization
wwwtext: show thread endpoints info w/ indexlevel=basic
altid: warn about non-word prefixes
www: add endpoint to retrieve altid dumps
MANIFEST | 4 ++
lib/PublicInbox/AltId.pm | 3 +-
lib/PublicInbox/ExtMsg.pm | 4 +-
lib/PublicInbox/GetlineBody.pm | 21 ++++----
lib/PublicInbox/GzipFilter.pm | 59 +++++++++++++++++++++
lib/PublicInbox/Mbox.pm | 16 +++---
lib/PublicInbox/Qspawn.pm | 66 ++++++++++++++----------
lib/PublicInbox/Search.pm | 4 +-
lib/PublicInbox/ViewVCS.pm | 8 +--
lib/PublicInbox/WWW.pm | 14 ++++-
lib/PublicInbox/WwwAltId.pm | 94 ++++++++++++++++++++++++++++++++++
lib/PublicInbox/WwwStream.pm | 29 +++++++++--
lib/PublicInbox/WwwText.pm | 10 +++-
t/gzip_filter.t | 37 +++++++++++++
t/httpd-corner.psgi | 16 ++++++
t/httpd-corner.t | 48 +++++++++++++++++
t/www_altid.t | 83 ++++++++++++++++++++++++++++++
17 files changed, 452 insertions(+), 64 deletions(-)
create mode 100644 lib/PublicInbox/GzipFilter.pm
create mode 100644 lib/PublicInbox/WwwAltId.pm
create mode 100644 t/gzip_filter.t
create mode 100644 t/www_altid.t
^ permalink raw reply [relevance 7%]
Results 1-3 of 3 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2020-03-21 2:03 7% [PATCH 00/11] www: export SQLite altid dumps Eric Wong
2020-03-21 2:03 4% ` [PATCH 11/11] www: add endpoint to retrieve " Eric Wong
2020-03-30 5:18 4% [PATCH] wwwstream::oneshot => html_oneshot Eric Wong
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).