user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH] wwwstream::oneshot => html_oneshot
@ 2020-03-30  5:18  4% Eric Wong
  0 siblings, 0 replies; 3+ results
From: Eric Wong @ 2020-03-30  5:18 UTC (permalink / raw)
  To: meta

And use Exporter to make our life easier, since WwwAltId was
using a non-existent PublicInbox::WwwResponse namespace in error
paths which doesn't get noticed by `perl -c' or exercised by
tests on normal systems.

Fixes: 6512b1245ebc6fe3 ("www: add endpoint to retrieve altid dumps")
---
 lib/PublicInbox/ExtMsg.pm    |  6 +++---
 lib/PublicInbox/Mbox.pm      |  3 ++-
 lib/PublicInbox/ViewVCS.pm   |  4 ++--
 lib/PublicInbox/WwwAltId.pm  | 16 ++++++++--------
 lib/PublicInbox/WwwStream.pm |  4 +++-
 5 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm
index a14eddf6..1d17c2ce 100644
--- a/lib/PublicInbox/ExtMsg.pm
+++ b/lib/PublicInbox/ExtMsg.pm
@@ -9,7 +9,7 @@ package PublicInbox::ExtMsg;
 use strict;
 use warnings;
 use PublicInbox::Hval qw(ascii_html prurl mid_href);
-use PublicInbox::WwwStream;
+use PublicInbox::WwwStream qw(html_oneshot);
 use PublicInbox::Smsg;
 our $MIN_PARTIAL_LEN = 16;
 
@@ -159,7 +159,7 @@ sub ext_msg {
 	$ctx->{-html_tip} = $s .= '</pre>';
 	$ctx->{-title_html} = $title;
 	$ctx->{-upfx} = '../';
-	PublicInbox::WwwStream::oneshot($ctx, $code);
+	html_oneshot($ctx, $code);
 }
 
 sub ext_urls {
@@ -197,7 +197,7 @@ sub exact {
 					qq(<a\nhref="$u$href/">$u$html/</a>\n)
 				} @$found),
 			$ext_urls, '</pre>');
-	PublicInbox::WwwStream::oneshot($ctx, $code);
+	html_oneshot($ctx, $code);
 }
 
 1;
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 46964bbb..3013dc91 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -13,6 +13,7 @@ use warnings;
 use PublicInbox::MID qw/mid_escape/;
 use PublicInbox::Hval qw/to_filename/;
 use PublicInbox::Smsg;
+use PublicInbox::WwwStream qw(html_oneshot);
 use Email::Simple;
 use Email::MIME::Encode;
 
@@ -236,7 +237,7 @@ sub mbox_all {
 }
 
 sub need_gzip {
-	PublicInbox::WwwStream::oneshot($_[0], 501, \<<EOF);
+	html_oneshot($_[0], 501, \<<EOF);
 <pre>gzipped mbox not available
 
 The administrator needs to install the Compress::Raw::Zlib Perl module
diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index 6714e67c..069937c1 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -18,7 +18,7 @@ use strict;
 use warnings;
 use bytes (); # only for bytes::length
 use PublicInbox::SolverGit;
-use PublicInbox::WwwStream;
+use PublicInbox::WwwStream qw(html_oneshot);
 use PublicInbox::Linkify;
 use PublicInbox::Tmpfile;
 use PublicInbox::Hval qw(ascii_html to_filename);
@@ -35,7 +35,7 @@ sub html_page ($$$) {
 	my ($ctx, $code, $strref) = @_;
 	my $wcb = delete $ctx->{-wcb};
 	$ctx->{-upfx} = '../../'; # from "/$INBOX/$OID/s/"
-	my $res = PublicInbox::WwwStream::oneshot($ctx, $code, $strref);
+	my $res = html_oneshot($ctx, $code, $strref);
 	$wcb ? $wcb->($res) : $res;
 }
 
diff --git a/lib/PublicInbox/WwwAltId.pm b/lib/PublicInbox/WwwAltId.pm
index 263e884a..630ed686 100644
--- a/lib/PublicInbox/WwwAltId.pm
+++ b/lib/PublicInbox/WwwAltId.pm
@@ -5,13 +5,13 @@
 package PublicInbox::WwwAltId;
 use strict;
 use PublicInbox::Qspawn;
-use PublicInbox::WwwStream;
+use PublicInbox::WwwStream qw(html_oneshot);
 use PublicInbox::AltId;
 use PublicInbox::Spawn qw(which);
 our $sqlite3 = $ENV{SQLITE3};
 
 sub sqlite3_missing ($) {
-	PublicInbox::WwwResponse::oneshot($_[0], 501, \<<EOF);
+	html_oneshot($_[0], 501, \<<EOF);
 <pre>sqlite3 not available
 
 The administrator needs to install the sqlite3(1) binary
@@ -22,11 +22,11 @@ EOF
 
 sub check_output {
 	my ($r, $bref, $ctx) = @_;
-	return PublicInbox::WwwResponse::oneshot($ctx, 500) if !defined($r);
+	return html_oneshot($ctx, 500) if !defined($r);
 	if ($r == 0) {
 		my $err = eval { $ctx->{env}->{'psgi.errors'} } // \*STDERR;
 		$err->print("unexpected EOF from sqlite3\n");
-		return PublicInbox::WwwResponse::oneshot($ctx, 501);
+		return html_oneshot($ctx, 501);
 	}
 	[200, [ qw(Content-Type application/gzip), 'Content-Disposition',
 		"inline; filename=$ctx->{altid_pfx}.sql.gz" ] ]
@@ -43,14 +43,14 @@ sub sqldump ($$) {
 	my $altid_map = $ibx->altid_map;
 	my $fn = $altid_map->{$altid_pfx};
 	unless (defined $fn) {
-		return PublicInbox::WwwStream::oneshot($ctx, 404, \<<EOF);
+		return html_oneshot($ctx, 404, \<<EOF);
 <pre>`$altid_pfx' is not a valid altid for this inbox</pre>
 EOF
 	}
 
 	if ($env->{REQUEST_METHOD} ne 'POST') {
 		my $url = $ibx->base_url($ctx->{env}) . "$altid_pfx.sql.gz";
-		return PublicInbox::WwwStream::oneshot($ctx, 405, \<<EOF);
+		return html_oneshot($ctx, 405, \<<EOF);
 <pre>A POST request required to retrieve $altid_pfx.sql.gz
 
 	curl -XPOST -O $url
@@ -65,7 +65,7 @@ EOF
 	}
 
 	eval { require PublicInbox::GzipFilter } or
-		return PublicInbox::WwwStream::oneshot($ctx, 501, \<<EOF);
+		return html_oneshot($ctx, 501, \<<EOF);
 <pre>gzip output not available
 
 The administrator needs to install the Compress::Raw::Zlib Perl module
@@ -73,7 +73,7 @@ to support gzipped sqlite3 dumps.</pre>
 EOF
 	$sqlite3 //= which('sqlite3');
 	if (!defined($sqlite3)) {
-		return PublicInbox::WwwStream::oneshot($ctx, 501, \<<EOF);
+		return html_oneshot($ctx, 501, \<<EOF);
 <pre>sqlite3 not available
 
 The administrator needs to install the sqlite3(1) binary
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index 985e0262..915a71ba 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -9,6 +9,8 @@
 package PublicInbox::WwwStream;
 use strict;
 use warnings;
+use base qw(Exporter);
+our @EXPORT_OK = qw(html_oneshot);
 use bytes (); # length
 use PublicInbox::Hval qw(ascii_html prurl);
 our $TOR_URL = 'https://www.torproject.org/';
@@ -170,7 +172,7 @@ sub getline {
 	delete $self->{cb} ? _html_end($self) : undef;
 }
 
-sub oneshot {
+sub html_oneshot ($$;$) {
 	my ($ctx, $code, $sref) = @_;
 	my $self = bless {
 		ctx => $ctx,

^ permalink raw reply related	[relevance 4%]

* [PATCH 11/11] www: add endpoint to retrieve altid dumps
  2020-03-21  2:03  7% [PATCH 00/11] www: export SQLite altid dumps Eric Wong
@ 2020-03-21  2:03  4% ` Eric Wong
  0 siblings, 0 replies; 3+ results
From: Eric Wong @ 2020-03-21  2:03 UTC (permalink / raw)
  To: meta

This ensures all our indexed data, including data from altid
searches (e.g. "gmane:$ARTNUM") is retrievable.

It uses a "POST" request to avoid wasting cycles when invoked by
crawlers, since it could potentially be several megabytes of
data not indexable by search engines.
---
 MANIFEST                    |  2 +
 lib/PublicInbox/AltId.pm    |  1 +
 lib/PublicInbox/WWW.pm      | 14 +++++-
 lib/PublicInbox/WwwAltId.pm | 94 +++++++++++++++++++++++++++++++++++++
 t/www_altid.t               | 83 ++++++++++++++++++++++++++++++++
 5 files changed, 192 insertions(+), 2 deletions(-)
 create mode 100644 lib/PublicInbox/WwwAltId.pm
 create mode 100644 t/www_altid.t

diff --git a/MANIFEST b/MANIFEST
index be1c4ab5..84872561 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -168,6 +168,7 @@ lib/PublicInbox/ViewVCS.pm
 lib/PublicInbox/WWW.pm
 lib/PublicInbox/WWW.pod
 lib/PublicInbox/WatchMaildir.pm
+lib/PublicInbox/WwwAltId.pm
 lib/PublicInbox/WwwAtomStream.pm
 lib/PublicInbox/WwwAttach.pm
 lib/PublicInbox/WwwHighlight.pm
@@ -300,6 +301,7 @@ t/view.t
 t/watch_filter_rubylang.t
 t/watch_maildir.t
 t/watch_maildir_v2.t
+t/www_altid.t
 t/www_listing.t
 t/www_static.t
 t/x-unknown-alpine.eml
diff --git a/lib/PublicInbox/AltId.pm b/lib/PublicInbox/AltId.pm
index 3be6c73c..6d16242a 100644
--- a/lib/PublicInbox/AltId.pm
+++ b/lib/PublicInbox/AltId.pm
@@ -39,6 +39,7 @@ sub new {
 	bless {
 		filename => $f,
 		writable => $writable,
+		prefix => $prefix,
 		xprefix => 'X'.uc($prefix),
 	}, $class;
 }
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 2434f2f5..5017f572 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -65,6 +65,8 @@ sub call {
 			my ($epoch, $path) = ($2, $3);
 			return invalid_inbox($ctx, $1) ||
 				serve_git($ctx, $epoch, $path);
+		} elsif ($path_info =~ m!$INBOX_RE/(\w+)\.sql\.gz\z!o) {
+			return get_altid_dump($ctx, $1, $2);
 		} elsif ($path_info =~ m!$INBOX_RE/!o) {
 			return invalid_inbox($ctx, $1) || mbox_results($ctx);
 		}
@@ -150,8 +152,8 @@ sub preload {
 		require PublicInbox::Search;
 		PublicInbox::Search::load_xapian();
 	};
-	foreach (qw(PublicInbox::SearchView PublicInbox::MboxGz)) {
-		eval "require $_;";
+	for (qw(SearchView MboxGz WwwAltId)) {
+		eval "require PublicInbox::$_;";
 	}
 	if (ref($self)) {
 		my $pi_config = $self->{pi_config};
@@ -301,6 +303,14 @@ sub get_vcs_object ($$$;$) {
 	PublicInbox::ViewVCS::show($ctx, $oid, $filename);
 }
 
+sub get_altid_dump {
+	my ($ctx, $inbox, $altid_pfx) =@_;
+	my $r404 = invalid_inbox($ctx, $inbox);
+	return $r404 if $r404;
+	eval { require PublicInbox::WwwAltId } or return need($ctx, 'sqlite3');
+	PublicInbox::WwwAltId::sqldump($ctx, $altid_pfx);
+}
+
 sub need {
 	my ($ctx, $extra) = @_;
 	my $msg = <<EOF;
diff --git a/lib/PublicInbox/WwwAltId.pm b/lib/PublicInbox/WwwAltId.pm
new file mode 100644
index 00000000..34641a92
--- /dev/null
+++ b/lib/PublicInbox/WwwAltId.pm
@@ -0,0 +1,94 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# dumps using the ".dump" command of sqlite3(1)
+package PublicInbox::WwwAltId;
+use strict;
+use PublicInbox::Qspawn;
+use PublicInbox::WwwStream;
+use PublicInbox::AltId;
+use PublicInbox::Spawn qw(which);
+our $sqlite3 = $ENV{SQLITE3};
+
+# returns prefix => pathname mapping
+# (pathname is NOT public, but prefix is used for Xapian queries)
+sub altid_map ($) {
+	my ($ibx) = @_;
+	my $altid = $ibx->{altid} or return {};
+	my %h = map {;
+		my $x = PublicInbox::AltId->new($ibx, $_);
+		"$x->{prefix}" => $x->{filename}
+	} @$altid;
+	\%h;
+}
+
+sub sqlite3_missing ($) {
+	PublicInbox::WwwResponse::oneshot($_[0], 501, \<<EOF);
+<pre>sqlite3 not available
+
+The administrator needs to install the sqlite3(1) binary
+to support gzipped sqlite3 dumps.</pre>
+</pre>
+EOF
+}
+
+sub check_output {
+	my ($r, $bref, $ctx) = @_;
+	return PublicInbox::WwwResponse::oneshot($ctx, 500) if !defined($r);
+	if ($r == 0) {
+		my $err = eval { $ctx->{env}->{'psgi.errors'} } // \*STDERR;
+		$err->print("unexpected EOF from sqlite3\n");
+		return PublicInbox::WwwResponse::oneshot($ctx, 501);
+	}
+	[200, [ qw(Content-Type application/gzip), 'Content-Disposition',
+		"inline; filename=$ctx->{altid_pfx}.sql.gz" ] ]
+}
+
+# POST $INBOX/$prefix.sql.gz
+# we use the sqlite3(1) binary here since that's where the ".dump"
+# command is implemented, not (AFAIK) in the libsqlite3 library
+# and thus not usable from DBD::SQLite.
+sub sqldump ($$) {
+	my ($ctx, $altid_pfx) = @_;
+	my $ibx = $ctx->{-inbox};
+	my $altid_map = $ibx->{-altid_map} //= altid_map($ibx);
+	my $fn = $altid_map->{$altid_pfx};
+	unless (defined $fn) {
+		return PublicInbox::WwwStream::oneshot($ctx, 404, \<<EOF);
+<pre>`$altid_pfx' is not a valid altid for this inbox</pre>
+EOF
+	}
+
+	eval { require PublicInbox::GzipFilter } or
+		return PublicInbox::WwwStream::oneshot($ctx, 501, \<<EOF);
+<pre>gzip output not available
+
+The administrator needs to install the Compress::Raw::Zlib Perl module
+to support gzipped sqlite3 dumps.</pre>
+EOF
+	$sqlite3 //= which('sqlite3');
+	if (!defined($sqlite3)) {
+		return PublicInbox::WwwStream::oneshot($ctx, 501, \<<EOF);
+<pre>sqlite3 not available
+
+The administrator needs to install the sqlite3(1) binary
+to support gzipped sqlite3 dumps.</pre>
+</pre>
+EOF
+	}
+
+	# setup stdin, POSIX requires writes <= 512 bytes to succeed so
+	# we can close the pipe right away.
+	pipe(my ($r, $w)) or die "pipe: $!";
+	syswrite($w, ".dump\n") == 6 or die "write: $!";
+	close($w) or die "close: $!";
+
+	# TODO: use -readonly if available with newer sqlite3(1)
+	my $qsp = PublicInbox::Qspawn->new([$sqlite3, $fn], undef, { 0 => $r });
+	my $env = $ctx->{env};
+	$ctx->{altid_pfx} = $altid_pfx;
+	$env->{'qspawn.filter'} = PublicInbox::GzipFilter->new;
+	$qsp->psgi_return($env, undef, \&check_output, $ctx);
+}
+
+1;
diff --git a/t/www_altid.t b/t/www_altid.t
new file mode 100644
index 00000000..a885c389
--- /dev/null
+++ b/t/www_altid.t
@@ -0,0 +1,83 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use Test::More;
+use PublicInbox::TestCommon;
+use PublicInbox::Inbox;
+use PublicInbox::InboxWritable;
+use PublicInbox::Config;
+use PublicInbox::Spawn qw(which spawn);
+which('sqlite3') or plan skip_all => 'sqlite3 binary missing';
+require_mods(qw(DBD::SQLite HTTP::Request::Common Plack::Test URI::Escape
+	Plack::Builder IO::Uncompress::Gunzip));
+use_ok($_) for qw(Plack::Test HTTP::Request::Common);
+require_ok 'PublicInbox::Msgmap';
+require_ok 'PublicInbox::AltId';
+require_ok 'PublicInbox::WWW';
+my ($inboxdir, $for_destroy) = tmpdir();
+my $aid = 'xyz';
+my $spec = "serial:$aid:file=blah.sqlite3";
+if ('setup') {
+	my $opts = {
+		inboxdir => $inboxdir,
+		name => 'test',
+		-primary_address => 'test@example.com',
+	};
+	my $ibx = PublicInbox::Inbox->new($opts);
+	$ibx = PublicInbox::InboxWritable->new($ibx, 1);
+	my $im = $ibx->importer(0);
+	my $mime = PublicInbox::MIME->new(<<'EOF');
+From: a@example.com
+Message-Id: <a@example.com>
+
+EOF
+	$im->add($mime);
+	$im->done;
+	mkdir "$inboxdir/public-inbox" or die;
+	my $altid = PublicInbox::AltId->new($ibx, $spec, 1);
+	$altid->mm_alt->mid_set(1, 'a@example.com');
+}
+
+my $cfgpath = "$inboxdir/cfg";
+open my $fh, '>', $cfgpath or die;
+print $fh <<EOF or die;
+[publicinbox "test"]
+	inboxdir = $inboxdir
+	address = test\@example.com
+	altid = $spec
+	url = http://example.com/test
+EOF
+close $fh or die;
+my $cfg = PublicInbox::Config->new($cfgpath);
+my $www = PublicInbox::WWW->new($cfg);
+my $cmpfile = "$inboxdir/cmp.sqlite3";
+my $client = sub {
+	my ($cb) = @_;
+	my $res = $cb->(POST("/test/$aid.sql.gz"));
+	is($res->code, 200, 'retrieved gzipped dump');
+	IO::Uncompress::Gunzip::gunzip(\($res->content) => \(my $buf));
+	pipe(my ($r, $w)) or die;
+	my $cmd = ['sqlite3', $cmpfile];
+	my $pid = spawn($cmd, undef, { 0 => $r });
+	print $w $buf or die;
+	close $w or die;
+	is(waitpid($pid, 0), $pid, 'sqlite3 exited');
+	is($?, 0, 'sqlite3 loaded dump');
+	my $mm_cmp = PublicInbox::Msgmap->new_file($cmpfile);
+	is($mm_cmp->mid_for(1), 'a@example.com', 'sqlite3 dump valid');
+	$mm_cmp = undef;
+	unlink $cmpfile or die;
+};
+test_psgi(sub { $www->call(@_) }, $client);
+SKIP: {
+	require_mods(qw(Plack::Test::ExternalServer), 4);
+	my $env = { PI_CONFIG => $cfgpath };
+	my $sock = tcp_server() or die;
+	my ($out, $err) = map { "$inboxdir/std$_.log" } qw(out err);
+	my $cmd = [ qw(-httpd -W0), "--stdout=$out", "--stderr=$err" ];
+	my $td = start_script($cmd, $env, { 3 => $sock });
+	my ($h, $p) = ($sock->sockhost, $sock->sockport);
+	local $ENV{PLACK_TEST_EXTERNALSERVER_URI} = "http://$h:$p";
+	Plack::Test::ExternalServer::test_psgi(client => $client);
+}
+done_testing;

^ permalink raw reply related	[relevance 4%]

* [PATCH 00/11] www: export SQLite altid dumps
@ 2020-03-21  2:03  7% Eric Wong
  2020-03-21  2:03  4% ` [PATCH 11/11] www: add endpoint to retrieve " Eric Wong
  0 siblings, 1 reply; 3+ results
From: Eric Wong @ 2020-03-21  2:03 UTC (permalink / raw)
  To: meta

To improve reproducibility in mirrors, altid dumps can be
exported via "POST /$INBOX_URL/$prefix.sql.gz".  $prefix is
something like "gmane" (though the search prefix is "gmane:"
with a colon).

Eric Wong (11):
  qspawn: reinstate filter support, add gzip filter
  gzipfilter: lazy allocate the deflate context
  wwwstream: introduce oneshot API to avoid ->getline
  extmsg: use WwwResponse::oneshot
  wwwstream: oneshot sets content-length
  mbox: need_gzip uses WwwStream::oneshot
  qspawn: handle ENOENT (and other errors on exec)
  search: clobber -user_pfx on query parser initialization
  wwwtext: show thread endpoints info w/ indexlevel=basic
  altid: warn about non-word prefixes
  www: add endpoint to retrieve altid dumps

 MANIFEST                       |  4 ++
 lib/PublicInbox/AltId.pm       |  3 +-
 lib/PublicInbox/ExtMsg.pm      |  4 +-
 lib/PublicInbox/GetlineBody.pm | 21 ++++----
 lib/PublicInbox/GzipFilter.pm  | 59 +++++++++++++++++++++
 lib/PublicInbox/Mbox.pm        | 16 +++---
 lib/PublicInbox/Qspawn.pm      | 66 ++++++++++++++----------
 lib/PublicInbox/Search.pm      |  4 +-
 lib/PublicInbox/ViewVCS.pm     |  8 +--
 lib/PublicInbox/WWW.pm         | 14 ++++-
 lib/PublicInbox/WwwAltId.pm    | 94 ++++++++++++++++++++++++++++++++++
 lib/PublicInbox/WwwStream.pm   | 29 +++++++++--
 lib/PublicInbox/WwwText.pm     | 10 +++-
 t/gzip_filter.t                | 37 +++++++++++++
 t/httpd-corner.psgi            | 16 ++++++
 t/httpd-corner.t               | 48 +++++++++++++++++
 t/www_altid.t                  | 83 ++++++++++++++++++++++++++++++
 17 files changed, 452 insertions(+), 64 deletions(-)
 create mode 100644 lib/PublicInbox/GzipFilter.pm
 create mode 100644 lib/PublicInbox/WwwAltId.pm
 create mode 100644 t/gzip_filter.t
 create mode 100644 t/www_altid.t

^ permalink raw reply	[relevance 7%]

Results 1-3 of 3 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2020-03-21  2:03  7% [PATCH 00/11] www: export SQLite altid dumps Eric Wong
2020-03-21  2:03  4% ` [PATCH 11/11] www: add endpoint to retrieve " Eric Wong
2020-03-30  5:18  4% [PATCH] wwwstream::oneshot => html_oneshot Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).