user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 2/5] extindex: add some validation and config knobs for WWW
Date: Tue, 16 Mar 2021 23:02:15 -0800	[thread overview]
Message-ID: <20210317070218.7971-3-e@80x24.org> (raw)
In-Reply-To: <20210317070218.7971-1-e@80x24.org>

We'll try to share a bit more configuration with
extindex entries for WWW PSGI usage.
---
 MANIFEST                         |  1 +
 lib/PublicInbox/Config.pm        | 76 +++++++++++++++++---------------
 lib/PublicInbox/ExtSearch.pm     |  2 +-
 lib/PublicInbox/TestCommon.pm    | 28 +++++++++++-
 lib/PublicInbox/WwwAtomStream.pm |  6 ++-
 t/config.t                       |  4 +-
 t/extindex-psgi.t                | 48 ++++++++++++++++++++
 t/psgi_v2.t                      | 30 +++----------
 8 files changed, 130 insertions(+), 65 deletions(-)
 create mode 100644 t/extindex-psgi.t

diff --git a/MANIFEST b/MANIFEST
index 49c10d62..775de5cd 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -324,6 +324,7 @@ t/eml.t
 t/eml_content_disposition.t
 t/eml_content_type.t
 t/epoll.t
+t/extindex-psgi.t
 t/extsearch.t
 t/fail-bin/spamc
 t/fake_inotify.t
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index 87a03fd3..113975dd 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -88,12 +88,12 @@ sub lookup_list_id {
 
 sub lookup_name ($$) {
 	my ($self, $name) = @_;
-	$self->{-by_name}->{$name} // _fill($self, "publicinbox.$name");
+	$self->{-by_name}->{$name} // _fill_ibx($self, $name);
 }
 
 sub lookup_ei {
 	my ($self, $name) = @_;
-	$self->{-ei_by_name}->{$name} //= _fill_ei($self, "extindex.$name");
+	$self->{-ei_by_name}->{$name} //= _fill_ei($self, $name);
 }
 
 # special case for [extindex "all"]
@@ -167,8 +167,8 @@ sub git_config_dump {
 	$rv;
 }
 
-sub valid_inbox_name ($) {
-	my ($name) = @_;
+sub valid_foo_name ($;$) {
+	my ($name, $pfx) = @_;
 
 	# Similar rules found in git.git/remote.c::valid_remote_nick
 	# and git.git/refs.c::check_refname_component
@@ -176,6 +176,7 @@ sub valid_inbox_name ($) {
 	if ($name eq '' || $name =~ /\@\{/ ||
 	    $name =~ /\.\./ || $name =~ m![/:\?\[\]\^~\s\f[:cntrl:]\*]! ||
 	    $name =~ /\A\./ || $name =~ /\.\z/) {
+		warn "invalid $pfx name: `$name'\n" if $pfx;
 		return 0;
 	}
 
@@ -375,26 +376,26 @@ sub rel2abs_collapsed {
 	Cwd::abs_path($p);
 }
 
-sub _fill {
-	my ($self, $pfx) = @_;
-	my $ibx = {};
+sub _one_val {
+	my ($self, $pfx, $k) = @_;
+	my $v = $self->{"$pfx.$k"} // return;
+	return $v if !ref($v);
+	warn "W: $pfx.$k has multiple values, only using `$v->[-1]'\n";
+	$v->[-1];
+}
 
+sub _fill_ibx {
+	my ($self, $name) = @_;
+	my $pfx = "publicinbox.$name";
+	my $ibx = {};
 	for my $k (qw(watch nntpserver)) {
 		my $v = $self->{"$pfx.$k"};
 		$ibx->{$k} = $v if defined $v;
 	}
 	for my $k (qw(filter inboxdir newsgroup replyto httpbackendmax feedmax
 			indexlevel indexsequentialshard)) {
-		if (defined(my $v = $self->{"$pfx.$k"})) {
-			if (ref($v) eq 'ARRAY') {
-				warn <<EOF;
-W: $pfx.$k has multiple values, only using `$v->[-1]'
-EOF
-				$ibx->{$k} = $v->[-1];
-			} else {
-				$ibx->{$k} = $v;
-			}
-		}
+		my $v = _one_val($self, $pfx, $k) // next;
+		$ibx->{$k} = $v;
 	}
 
 	# "mainrepo" is backwards compatibility:
@@ -403,9 +404,8 @@ EOF
 		warn "E: `$dir' must not contain `\\n'\n";
 		return;
 	}
-	foreach my $k (qw(obfuscate)) {
-		my $v = $self->{"$pfx.$k"};
-		defined $v or next;
+	for my $k (qw(obfuscate)) {
+		my $v = $self->{"$pfx.$k"} // next;
 		if (defined(my $bval = git_bool($v))) {
 			$ibx->{$k} = $bval;
 		} else {
@@ -414,19 +414,13 @@ EOF
 	}
 	# TODO: more arrays, we should support multi-value for
 	# more things to encourage decentralization
-	foreach my $k (qw(address altid nntpmirror coderepo hide listid url
+	for my $k (qw(address altid nntpmirror coderepo hide listid url
 			infourl watchheader)) {
-		if (defined(my $v = $self->{"$pfx.$k"})) {
-			$ibx->{$k} = _array($v);
-		}
-	}
-
-	my $name = substr($pfx, length('publicinbox.'));
-	if (!valid_inbox_name($name)) {
-		warn "invalid inbox name: '$name'\n";
-		return;
+		my $v = $self->{"$pfx.$k"} // next;
+		$ibx->{$k} = _array($v);
 	}
 
+	return unless valid_foo_name($name, 'publicinbox');
 	$ibx->{name} = $name;
 	$ibx->{-pi_cfg} = $self;
 	$ibx = PublicInbox::Inbox->new($ibx);
@@ -473,14 +467,13 @@ EOF
 		$ibx->{-no_obfuscate_re} = $self->{-no_obfuscate_re};
 		fill_all($self); # noop to populate -no_obfuscate
 	}
-
 	if (my $ibx_code_repos = $ibx->{coderepo}) {
 		my $code_repos = $self->{-code_repos};
 		my $repo_objs = $ibx->{-repo_objs} = [];
 		foreach my $nick (@$ibx_code_repos) {
 			my @parts = split(m!/!, $nick);
 			my $valid = 0;
-			$valid += valid_inbox_name($_) foreach (@parts);
+			$valid += valid_foo_name($_) foreach (@parts);
 			$valid == scalar(@parts) or next;
 
 			my $repo = $code_repos->{$nick} //=
@@ -496,10 +489,23 @@ EOF
 }
 
 sub _fill_ei ($$) {
-	my ($self, $pfx) = @_;
+	my ($self, $name) = @_;
 	eval { require PublicInbox::ExtSearch } or return;
-	my $d = $self->{"$pfx.topdir"};
-	defined($d) && -d $d ? PublicInbox::ExtSearch->new($d) : undef;
+	my $pfx = "extindex.$name";
+	my $d = $self->{"$pfx.topdir"} // return;
+	-d $d or return;
+	my $es = PublicInbox::ExtSearch->new($d);
+	for my $k (qw(indexlevel indexsequentialshard)) {
+		my $v = _one_val($self, $pfx, $k) // next;
+		$es->{$k} = $v;
+	}
+	for my $k (qw(altid coderepo hide url infourl)) {
+		my $v = $self->{"$pfx.$k"} // next;
+		$es->{$k} = _array($v);
+	}
+	return unless valid_foo_name($name, 'extindex');
+	$es->{name} = $name;
+	$es;
 }
 
 sub urlmatch {
diff --git a/lib/PublicInbox/ExtSearch.pm b/lib/PublicInbox/ExtSearch.pm
index 8ba4d396..c2cfc338 100644
--- a/lib/PublicInbox/ExtSearch.pm
+++ b/lib/PublicInbox/ExtSearch.pm
@@ -107,10 +107,10 @@ sub description {
 
 sub cloneurl { [] } # TODO
 
-sub base_url { 'https://example.com/TODO/' }
 sub nntp_url { [] }
 
 no warnings 'once';
+*base_url = \&PublicInbox::Inbox::base_url;
 *smsg_eml = \&PublicInbox::Inbox::smsg_eml;
 *smsg_by_mid = \&PublicInbox::Inbox::smsg_by_mid;
 *msg_by_mid = \&PublicInbox::Inbox::msg_by_mid;
diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm
index c2d07e59..0d15514e 100644
--- a/lib/PublicInbox/TestCommon.pm
+++ b/lib/PublicInbox/TestCommon.pm
@@ -15,7 +15,8 @@ BEGIN {
 	@EXPORT = qw(tmpdir tcp_server tcp_connect require_git require_mods
 		run_script start_script key2sub xsys xsys_e xqx eml_load tick
 		have_xapian_compact json_utf8 setup_public_inboxes create_inbox
-		tcp_host_port test_lei lei lei_ok $lei_out $lei_err $lei_opt);
+		tcp_host_port test_lei lei lei_ok $lei_out $lei_err $lei_opt
+		test_httpd);
 	require Test::More;
 	my @methods = grep(!/\W/, @Test::More::EXPORT);
 	eval(join('', map { "*$_=\\&Test::More::$_;" } @methods));
@@ -636,6 +637,31 @@ sub create_inbox ($$;@) {
 	$ibx;
 }
 
+sub test_httpd ($$;$) {
+	my ($env, $client, $skip) = @_;
+	for (qw(PI_CONFIG TMPDIR)) {
+		$env->{$_} or BAIL_OUT "$_ unset";
+	}
+	SKIP: {
+		require_mods(qw(Plack::Test::ExternalServer), $skip // 1);
+		my $sock = tcp_server() or die;
+		my ($out, $err) = map { "$env->{TMPDIR}/std$_.log" } qw(out err);
+		my $cmd = [ qw(-httpd -W0), "--stdout=$out", "--stderr=$err" ];
+		my $td = start_script($cmd, $env, { 3 => $sock });
+		my ($h, $p) = tcp_host_port($sock);
+		local $ENV{PLACK_TEST_EXTERNALSERVER_URI} = "http://$h:$p";
+		Plack::Test::ExternalServer::test_psgi(client => $client);
+		$td->join('TERM');
+		open my $fh, '<', $err or BAIL_OUT $!;
+		my $e = do { local $/; <$fh> };
+		if ($e =~ s/^Plack::Middleware::ReverseProxy missing,\n//gms) {
+			$e =~ s/^URL generation for redirects .*\n//gms;
+		}
+		is($e, '', 'no errors');
+	}
+};
+
+
 package PublicInboxTestProcess;
 use strict;
 
diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm
index 361e61f6..f60251b7 100644
--- a/lib/PublicInbox/WwwAtomStream.pm
+++ b/lib/PublicInbox/WwwAtomStream.pm
@@ -100,7 +100,11 @@ sub atom_header {
 	} else {
 		$title = title_tag($ibx->description);
 		$self_url .= 'new.atom';
-		$page_id = "mailto:$ibx->{-primary_address}";
+		if (defined(my $addr = $ibx->{-primary_address})) {
+			$page_id = "mailto:$addr";
+		} else {
+			$page_id = to_uuid($self_url);
+		}
 	}
 	qq(<?xml version="1.0" encoding="us-ascii"?>\n) .
 	qq(<feed\nxmlns="http://www.w3.org/2005/Atom"\n) .
diff --git a/t/config.t b/t/config.t
index fe684106..06a105c1 100644
--- a/t/config.t
+++ b/t/config.t
@@ -159,7 +159,7 @@ my $xre = join('|', keys %X);
 for my $s (@invalid) {
 	my $d = $s;
 	$d =~ s/($xre)/$X{$1}/g;
-	ok(!PublicInbox::Config::valid_inbox_name($s), "`$d' name rejected");
+	ok(!PublicInbox::Config::valid_foo_name($s), "`$d' name rejected");
 }
 
 # obviously-valid examples
@@ -175,7 +175,7 @@ my @valid = qw(a a@example a@example.com);
 # '!', '$', '=', '+'
 push @valid, qw[bang! ca$h less< more> 1% (parens) &more eql= +plus], '#hash';
 for my $s (@valid) {
-	ok(PublicInbox::Config::valid_inbox_name($s), "`$s' name accepted");
+	ok(PublicInbox::Config::valid_foo_name($s), "`$s' name accepted");
 }
 
 {
diff --git a/t/extindex-psgi.t b/t/extindex-psgi.t
new file mode 100644
index 00000000..6f62b5a0
--- /dev/null
+++ b/t/extindex-psgi.t
@@ -0,0 +1,48 @@
+#!perl -w
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use v5.10.1;
+use PublicInbox::TestCommon;
+use PublicInbox::Config;
+use File::Copy qw(cp);
+use IO::Handle ();
+require_git(2.6);
+require_mods(qw(json DBD::SQLite Search::Xapian
+		HTTP::Request::Common Plack::Test URI::Escape Plack::Builder));
+use_ok($_) for (qw(HTTP::Request::Common Plack::Test));
+require PublicInbox::WWW;
+my ($ro_home, $cfg_path) = setup_public_inboxes;
+my ($tmpdir, $for_destroy) = tmpdir;
+my $home = "$tmpdir/home";
+mkdir $home or BAIL_OUT $!;
+mkdir "$home/.public-inbox" or BAIL_OUT $!;
+my $pi_config = "$home/.public-inbox/config";
+cp("$ro_home/.public-inbox/config", $pi_config) or BAIL_OUT;
+my $env = { HOME => $home };
+run_script([qw(-extindex --all), "$tmpdir/eidx"], $env) or BAIL_OUT;
+{
+	open my $cfgfh, '>', $pi_config or BAIL_OUT;
+	$cfgfh->autoflush(1);
+	print $cfgfh <<EOM or BAIL_OUT;
+[extindex "all"]
+	topdir = $tmpdir/eidx
+	url = http://bogus.example.com/all
+EOM
+}
+my $www = PublicInbox::WWW->new(PublicInbox::Config->new($pi_config));
+my $client = sub {
+	my ($cb) = @_;
+	my $res = $cb->(GET('/all/'));
+	is($res->code, 200, '/all/ good');
+	$res = $cb->(GET('/all/new.atom', Host => 'usethis.example.com'));
+	like($res->content, qr!http://usethis\.example\.com/!s,
+		'Host: header respected in Atom feed');
+	unlike($res->content, qr!http://bogus\.example\.com/!s,
+		'default URL ignored with different host header');
+};
+test_psgi(sub { $www->call(@_) }, $client);
+%$env = (%$env, TMPDIR => $tmpdir, PI_CONFIG => $pi_config);
+test_httpd($env, $client);
+
+done_testing;
diff --git a/t/psgi_v2.t b/t/psgi_v2.t
index 487317b6..1f190708 100644
--- a/t/psgi_v2.t
+++ b/t/psgi_v2.t
@@ -56,27 +56,6 @@ EOF
 	close $fh or BAIL_OUT;
 }
 
-my $run_httpd = sub {
-	my ($client, $skip) = @_;
-	SKIP: {
-		require_mods(qw(Plack::Test::ExternalServer), $skip);
-		my $env = { PI_CONFIG => $cfgpath };
-		my $sock = tcp_server() or die;
-		my ($out, $err) = map { "$tmpdir/std$_.log" } qw(out err);
-		my $cmd = [ qw(-httpd -W0), "--stdout=$out", "--stderr=$err" ];
-		my $td = start_script($cmd, $env, { 3 => $sock });
-		my ($h, $p) = tcp_host_port($sock);
-		local $ENV{PLACK_TEST_EXTERNALSERVER_URI} = "http://$h:$p";
-		Plack::Test::ExternalServer::test_psgi(client => $client);
-		$td->join('TERM');
-		open my $fh, '<', $err or BAIL_OUT $!;
-		my $e = do { local $/; <$fh> };
-		if ($e =~ s/^Plack::Middleware::ReverseProxy missing,\n//gms) {
-			$e =~ s/^URL generation for redirects .*\n//gms;
-		}
-		is($e, '', 'no errors');
-	}
-};
 my $msg = $ibx->msg_by_mid('a-mid@b');
 like($$msg, qr/\AFrom oldbug/s,
 	'"From_" line stored to test old bug workaround');
@@ -115,7 +94,8 @@ my $client0 = sub {
 		'new.html ordering is chronological');
 };
 test_psgi(sub { $www->call(@_) }, $client0);
-$run_httpd->($client0, 9);
+my $env = { TMPDIR => $tmpdir, PI_CONFIG => $cfgpath };
+test_httpd($env, $client0, 9);
 
 $eml->header_set('Message-ID', 'a-mid@b');
 $eml->body_set("hello ghosts\n");
@@ -225,7 +205,7 @@ my $client1 = sub {
 };
 
 test_psgi(sub { $www->call(@_) }, $client1);
-$run_httpd->($client1, 38);
+test_httpd($env, $client1, 38);
 
 {
 	my $exp = [ qw(<a-mid@b> <reuse@mid>) ];
@@ -267,7 +247,7 @@ my $client2 = sub {
 };
 
 test_psgi(sub { $www->call(@_) }, $client2);
-$run_httpd->($client2, 8);
+test_httpd($env, $client2, 8);
 {
 	# ensure conflicted attachments can be resolved
 	local $SIG{__WARN__} = sub {};
@@ -298,6 +278,6 @@ my $client3 = sub {
 	is_deeply(\@warn, [], 'no warnings on YYYYMMDD only');
 };
 test_psgi(sub { $www->call(@_) }, $client3);
-$run_httpd->($client3, 4);
+test_httpd($env, $client3, 4);
 
 done_testing;

  parent reply	other threads:[~2021-03-17  7:02 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-17  7:02 [PATCH 0/5] www: extindex stuff Eric Wong
2021-03-17  7:02 ` [PATCH 1/5] www_stream: add trailing slash for help and color links Eric Wong
2021-03-17  7:02 ` Eric Wong [this message]
2021-03-17  7:02 ` [PATCH 3/5] config: lazy-load coderepos, support extindex Eric Wong
2021-03-17  7:02 ` [PATCH 4/5] www: improve visibility of coderepos Eric Wong
2021-03-17  7:02 ` [PATCH 5/5] tests: show lsof output on deleted-file-check failures Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210317070218.7971-3-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).