user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* Re: [PATCH] www: fix manifest.js.gz for default publicInbox.grokManifest
  2021-06-28 21:59  5% ` [PATCH] www: fix manifest.js.gz for default publicInbox.grokManifest Eric Wong
@ 2021-06-30  2:37  0%   ` Kyle Meyer
  0 siblings, 0 replies; 6+ results
From: Kyle Meyer @ 2021-06-30  2:37 UTC (permalink / raw)
  To: Eric Wong; +Cc: meta

Eric Wong writes:

> Fixes: 520be116e8a686cb ("www_listing: start updating for pagination + search")

Confirmed.  Thank you for the quick fix.

^ permalink raw reply	[relevance 0%]

* [PATCH] www: fix manifest.js.gz for default publicInbox.grokManifest
  2021-06-27 20:28  6% empty /manifest.js.gz response as of 520be116 Kyle Meyer
@ 2021-06-28 21:59  5% ` Eric Wong
  2021-06-30  2:37  0%   ` Kyle Meyer
  0 siblings, 1 reply; 6+ results
From: Eric Wong @ 2021-06-28 21:59 UTC (permalink / raw)
  To: meta; +Cc: Kyle Meyer

ManifestJsGz->response was not invoking the new "url_filter"
method properly.  Furthermore, fix url_filter for returning 404
responses.

Reported-by: Kyle Meyer <kyle@kyleam.com>
Link: https://public-inbox.org/meta/87fsx3128a.fsf@kyleam.com/
Fixes: 520be116e8a686cb ("www_listing: start updating for pagination + search")
---
 lib/PublicInbox/ManifestJsGz.pm |  4 +--
 lib/PublicInbox/WwwListing.pm   |  2 +-
 t/www_listing.t                 | 53 ++++++++++++++++++++-------------
 3 files changed, 35 insertions(+), 24 deletions(-)

diff --git a/lib/PublicInbox/ManifestJsGz.pm b/lib/PublicInbox/ManifestJsGz.pm
index 9dc10791..7fee78dd 100644
--- a/lib/PublicInbox/ManifestJsGz.pm
+++ b/lib/PublicInbox/ManifestJsGz.pm
@@ -13,11 +13,11 @@ use HTTP::Date qw(time2str);
 
 my $json = PublicInbox::Config::json();
 
-sub url_regexp {
+sub url_filter {
 	my ($ctx) = @_;
 	# grokmirror uses relative paths, so it's domain-dependent
 	# SUPER calls PublicInbox::WwwListing::url_filter
-	($ctx->url_filter('publicInbox.grokManifest', 'match=domain'))[0];
+	$ctx->SUPER::url_filter('publicInbox.grokManifest', 'match=domain');
 }
 
 sub inject_entry ($$$;$) {
diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm
index 76c76663..98a69986 100644
--- a/lib/PublicInbox/WwwListing.pm
+++ b/lib/PublicInbox/WwwListing.pm
@@ -55,7 +55,7 @@ again:
 	} elsif ($v eq 'all') {
 		(qr/./, undef);
 	} elsif ($v eq '404') {
-		undef;
+		(undef, undef);
 	} else {
 		warn <<EOF;
 `$v' is not a valid value for `$key'
diff --git a/t/www_listing.t b/t/www_listing.t
index 6a2892de..6b3b408f 100644
--- a/t/www_listing.t
+++ b/t/www_listing.t
@@ -1,14 +1,15 @@
+#!perl -w
 # Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 # manifest.js.gz generation and grok-pull integration test
 use strict;
-use warnings;
+use v5.10.1;
 use Test::More;
 use PublicInbox::Spawn qw(which);
 use PublicInbox::TestCommon;
 use PublicInbox::Import;
-require_mods(qw(json URI::Escape Plack::Builder Digest::SHA
-		IO::Compress::Gzip IO::Uncompress::Gunzip HTTP::Tiny));
+use IO::Uncompress::Gunzip qw(gunzip);
+require_mods(qw(json URI::Escape Plack::Builder Digest::SHA HTTP::Tiny));
 require PublicInbox::WwwListing;
 require PublicInbox::ManifestJsGz;
 use PublicInbox::Config;
@@ -32,21 +33,23 @@ like($bare->manifest_entry->{fingerprint}, qr/\A[a-f0-9]{40}\z/,
 	'got fingerprint with non-empty repo');
 
 sub tiny_test {
-	my ($json, $host, $port) = @_;
-	my $tmp;
+	my ($json, $host, $port, $html) = @_;
+	my ($tmp, $res);
 	my $http = HTTP::Tiny->new;
-	my $res = $http->get("http://$host:$port/");
-	is($res->{status}, 200, 'got HTML listing');
-	like($res->{content}, qr!</html>!si, 'listing looks like HTML');
-
-	$res = $http->get("http://$host:$port/", {'Accept-Encoding'=>'gzip'});
-	is($res->{status}, 200, 'got gzipped HTML listing');
-	IO::Uncompress::Gunzip::gunzip(\(delete $res->{content}) => \$tmp);
-	like($tmp, qr!</html>!si, 'unzipped listing looks like HTML');
-
+	if ($html) {
+		$res = $http->get("http://$host:$port/");
+		is($res->{status}, 200, 'got HTML listing');
+		like($res->{content}, qr!</html>!si, 'listing looks like HTML');
+
+		$res = $http->get("http://$host:$port/",
+				{'Accept-Encoding'=>'gzip'});
+		is($res->{status}, 200, 'got gzipped HTML listing');
+		gunzip(\(delete $res->{content}) => \$tmp);
+		like($tmp, qr!</html>!si, 'unzipped listing looks like HTML');
+	}
 	$res = $http->get("http://$host:$port/manifest.js.gz");
 	is($res->{status}, 200, 'got manifest');
-	IO::Uncompress::Gunzip::gunzip(\(delete $res->{content}) => \$tmp);
+	gunzip(\(delete $res->{content}) => \$tmp);
 	unlike($tmp, qr/"modified":\s*"/, 'modified is an integer');
 	my $manifest = $json->decode($tmp);
 	ok(my $clone = $manifest->{'/alt'}, '/alt in manifest');
@@ -95,10 +98,9 @@ SKIP: {
 		"lorelei \xc4\x80"), 0,
 		'set gitweb user');
 	ok(unlink("$bare->{git_dir}/description"), 'removed bare/description');
-	open $fh, '>', $cfgfile or die;
-	print $fh <<"" or die;
-[publicinbox]
-	wwwlisting = all
+	open $fh, '>', $cfgfile or xbail "open $cfgfile: $!";
+	$fh->autoflush(1);
+	print $fh <<"" or xbail "print $!";
 [publicinbox "bare"]
 	inboxdir = $bare->{git_dir}
 	url = http://$host/bare
@@ -112,13 +114,22 @@ SKIP: {
 	url = http://$host/v2
 	address = v2\@example.com
 
-	close $fh or die;
 	my $env = { PI_CONFIG => $cfgfile };
 	my $cmd = [ '-httpd', '-W0', "--stdout=$out", "--stderr=$err" ];
 	$td = start_script($cmd, $env, { 3 => $sock });
-	$sock = undef;
 
+	# default publicinboxGrokManifest match=domain default
 	tiny_test($json, $host, $port);
+	undef $td;
+
+	print $fh <<"" or xbail "print $!";
+[publicinbox]
+	wwwlisting = all
+
+	close $fh or xbail "close $!";
+	$td = start_script($cmd, $env, { 3 => $sock });
+	tiny_test($json, $host, $port, 1);
+	undef $sock;
 
 	my $grok_pull = which('grok-pull') or
 		skip('grok-pull not available', 12);

^ permalink raw reply related	[relevance 5%]

* empty /manifest.js.gz response as of 520be116
@ 2021-06-27 20:28  6% Kyle Meyer
  2021-06-28 21:59  5% ` [PATCH] www: fix manifest.js.gz for default publicInbox.grokManifest Eric Wong
  0 siblings, 1 reply; 6+ results
From: Kyle Meyer @ 2021-06-27 20:28 UTC (permalink / raw)
  To: meta

I recently upgraded a server from 08b649735 to 5860b498a and noticed
that grok-pull didn't bring in any updates.  It looks like what's going
on is that the top-level /manifest.js.gz endpoint is now coming up
empty.

A minimal example on 5860b498a:

  $ git clone --mirror https://try.public-inbox.org/test test
  $ export PI_CONFIG="$PWD/config"
  $ public-inbox-init -V1 test test/ \
     http://localhost:8080/test test@public-inbox.org
  $ public-inbox-index -v test
  $ public-inbox-httpd &

  $ curl -fsS http://localhost:8080/manifest.js.gz | zcat | jq
  {}

On 08b649735, the output is

  {
    "/test": {
      "reference": null,
      ...
    }
  }

Bisecting with the script below points to 520be116 (www_listing: start
updating for pagination + search, 2021-06-23).

--8<---------------cut here---------------start------------->8---
#!/bin/sh

perl Makefile.PL && make && sudo make install || exit 125

# Local clone of https://try.public-inbox.org/test
pi_test=/tmp/pi-test

cd "$(mktemp -d "${TMPDIR:-/tmp}"/pi-XXXXXXX)"

export PI_CONFIG="$PWD/config"

git clone --mirror "$pi_test" test
public-inbox-init -V1 test test/ \
                  http://localhost:8080/test test@public-inbox.org
public-inbox-index -v test

trap 'trap - TERM && pkill -P $$' INT TERM EXIT
public-inbox-httpd &

sleep 1

curl -fsS http://localhost:8080/manifest.js.gz | zcat | jq -e '.["/test"]'
--8<---------------cut here---------------end--------------->8---

^ permalink raw reply	[relevance 6%]

* [PATCH] www_listing: fix manifest.js.gz generation with extindex "all"
@ 2021-06-24 12:59  5% Eric Wong
  0 siblings, 0 replies; 6+ results
From: Eric Wong @ 2021-06-24 12:59 UTC (permalink / raw)
  To: meta

WwwListing and ManifestJsGz may be too different nowadays to
be worth the code sharing between them.

Update some comments and note we still needs better tests :x

Fixes: 520be116e8a686cb ("www_listing: start updating for pagination + search")
---
 lib/PublicInbox/ManifestJsGz.pm | 26 +++++++++++++++++++-------
 lib/PublicInbox/WwwListing.pm   |  3 ++-
 2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/lib/PublicInbox/ManifestJsGz.pm b/lib/PublicInbox/ManifestJsGz.pm
index e7bb0e86..9dc10791 100644
--- a/lib/PublicInbox/ManifestJsGz.pm
+++ b/lib/PublicInbox/ManifestJsGz.pm
@@ -13,12 +13,11 @@ use HTTP::Date qw(time2str);
 
 my $json = PublicInbox::Config::json();
 
-# called by WwwListing
 sub url_regexp {
 	my ($ctx) = @_;
 	# grokmirror uses relative paths, so it's domain-dependent
-	# SUPER calls PublicInbox::WwwListing::url_regexp
-	$ctx->SUPER::url_regexp('publicInbox.grokManifest', 'match=domain');
+	# SUPER calls PublicInbox::WwwListing::url_filter
+	($ctx->url_filter('publicInbox.grokManifest', 'match=domain'))[0];
 }
 
 sub inject_entry ($$$;$) {
@@ -29,7 +28,7 @@ sub inject_entry ($$$;$) {
 	$ctx->{manifest}->{$url_path} = $ent;
 }
 
-sub manifest_add ($$;$$) {
+sub manifest_add ($$;$$) { # slow path w/o extindex "all"
 	my ($ctx, $ibx, $epoch, $default_desc) = @_;
 	my $url_path = "/$ibx->{name}";
 	my $git;
@@ -70,20 +69,33 @@ sub eidx_manifest_add ($$$) {
 	}
 }
 
+sub response {
+	my ($class, $ctx) = @_;
+	bless $ctx, $class;
+	my ($re, undef) = $ctx->url_filter;
+	$re // return psgi_triple($ctx);
+	my $iter = PublicInbox::ConfigIter->new($ctx->{www}->{pi_cfg},
+					$ctx->can('list_match_i'), $re, $ctx);
+	sub {
+		$ctx->{-wcb} = $_[0]; # HTTP server callback
+		$ctx->{env}->{'pi-httpd.async'} ?
+				$iter->event_step : $iter->each_section;
+	}
+}
+
 sub ibx_entry {
 	my ($ctx, $ibx) = @_;
 	my $ALL = $ctx->{www}->{pi_cfg}->ALL;
-	if ($ALL) {
+	if ($ALL) { # FIXME: test this in t/
 		eidx_manifest_add($ctx, $ALL, $ibx);
 	} else {
 		slow_manifest_add($ctx, $ibx);
+		warn "E: $@" if $@;
 	}
-	warn "E: $@" if $@;
 }
 
 sub hide_key { 'manifest' } # for WwwListing->list_match_i
 
-# overrides WwwListing->psgi_triple
 sub psgi_triple {
 	my ($ctx) = @_;
 	my $abs2urlpath = delete($ctx->{-abs2urlpath}) // {};
diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm
index eb015742..76c76663 100644
--- a/lib/PublicInbox/WwwListing.pm
+++ b/lib/PublicInbox/WwwListing.pm
@@ -90,7 +90,7 @@ sub add_misc_ibx { # MiscSearch->retry_reopen callback
 		$qs = "( $qs ) AND ( $user_query )";
 	}
 	my $mset = $misc->mset($qs, $opt); # sorts by $MODIFIED (mtime)
-	$ctx->{-list} = [];
+	delete $ctx->{-list}; # reset if retried
 	my $pi_cfg = $ctx->{www}->{pi_cfg};
 	for my $mi ($mset->items) {
 		my $doc = $mi->get_document;
@@ -122,6 +122,7 @@ sub response {
 	my ($re, $qs) = $ctx->url_filter;
 	$re // return $ctx->psgi_triple;
 	if (my $ALL = $ctx->{www}->{pi_cfg}->ALL) { # fast path
+		# FIXME: test this in t/
 		$ALL->misc->reopen->retry_reopen(\&add_misc_ibx,
 						$ctx, $re, $qs);
 	} else { # slow path, no [extindex "all"] configured

^ permalink raw reply related	[relevance 5%]

* [PATCH 2/3] www_listing: start updating for pagination + search
  2021-06-23 11:14  7% [PATCH 0/3] some WWW search things Eric Wong
@ 2021-06-23 11:14  4% ` Eric Wong
  0 siblings, 0 replies; 6+ results
From: Eric Wong @ 2021-06-23 11:14 UTC (permalink / raw)
  To: meta

When dealing with thousands of inboxes, displaying all of
them on a single page isn't going to work.  So steal some
pagination and search results code from the message search
to generate some basic HTML output that looks good in w3m.
---
 lib/PublicInbox/Config.pm       |   5 ++
 lib/PublicInbox/ManifestJsGz.pm |   2 +-
 lib/PublicInbox/MiscSearch.pm   |  34 +++----
 lib/PublicInbox/SearchQuery.pm  |  13 +--
 lib/PublicInbox/SearchView.pm   |   2 +-
 lib/PublicInbox/WwwListing.pm   | 155 +++++++++++++++++++++++++++-----
 6 files changed, 164 insertions(+), 47 deletions(-)

diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index 3f0f5a01..36f2fafb 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -97,6 +97,11 @@ sub lookup_ei {
 	$self->{-ei_by_name}->{$name} //= _fill_ei($self, $name);
 }
 
+sub lookup_eidx_key {
+	my ($self, $eidx_key) = @_;
+	_lookup_fill($self, '-by_eidx_key', $eidx_key);
+}
+
 # special case for [extindex "all"]
 sub ALL { lookup_ei($_[0], 'all') }
 
diff --git a/lib/PublicInbox/ManifestJsGz.pm b/lib/PublicInbox/ManifestJsGz.pm
index 31cf15dc..e7bb0e86 100644
--- a/lib/PublicInbox/ManifestJsGz.pm
+++ b/lib/PublicInbox/ManifestJsGz.pm
@@ -81,7 +81,7 @@ sub ibx_entry {
 	warn "E: $@" if $@;
 }
 
-sub hide_key { 'manifest' }
+sub hide_key { 'manifest' } # for WwwListing->list_match_i
 
 # overrides WwwListing->psgi_triple
 sub psgi_triple {
diff --git a/lib/PublicInbox/MiscSearch.pm b/lib/PublicInbox/MiscSearch.pm
index 4e010453..6b575b0d 100644
--- a/lib/PublicInbox/MiscSearch.pm
+++ b/lib/PublicInbox/MiscSearch.pm
@@ -59,7 +59,7 @@ sub misc_enquire_once { # retry_reopen callback
 	$eq->set_query($qr);
         my $desc = !$opt->{asc};
 	my $rel = $opt->{relevance} // 0;
-	if ($rel == -1) { # ORDER BY docid/UID
+	if ($rel == -1) { # ORDER BY docid
 		$eq->set_docid_order($PublicInbox::Search::ENQ_ASCENDING);
 		$eq->set_weighting_scheme($PublicInbox::Search::X{BoolWeight}->new);
 	} elsif ($rel) {
@@ -132,6 +132,23 @@ sub ibx_data_once {
 	}
 }
 
+sub doc2ibx_cache_ent { # @_ == ($self, $doc) OR ($doc)
+	my ($doc) = $_[-1];
+	my $d;
+	my $data = $json->decode($doc->get_data);
+	for (values %$data) {
+		$d = $_->{description} // next;
+		$d =~ s/ \[epoch [0-9]+\]\z// or next;
+		last;
+	}
+	{
+		uidvalidity => int_val($doc, $UIDVALIDITY),
+		-modified => int_val($doc, $MODIFIED),
+		# extract description from manifest.js.gz epoch description
+		description => $d
+	};
+}
+
 sub inbox_data {
 	my ($self, $ibx) = @_;
 	retry_reopen($self, \&ibx_data_once, $ibx);
@@ -141,20 +158,7 @@ sub ibx_cache_load {
 	my ($doc, $cache) = @_;
 	my ($eidx_key) = xap_terms('Q', $doc);
 	return unless defined($eidx_key); # expired
-	my $ce = $cache->{$eidx_key} = {};
-	$ce->{uidvalidity} = int_val($doc, $UIDVALIDITY);
-	$ce->{-modified} = int_val($doc, $MODIFIED);
-	$ce->{description} = do {
-		# extract description from manifest.js.gz epoch description
-		my $d;
-		my $data = $json->decode($doc->get_data);
-		for (values %$data) {
-			$d = $_->{description} // next;
-			$d =~ s/ \[epoch [0-9]+\]\z// or next;
-			last;
-		}
-		$d;
-	}
+	$cache->{$eidx_key} = doc2ibx_cache_ent($doc);
 }
 
 sub _nntpd_cache_load { # retry_reopen callback
diff --git a/lib/PublicInbox/SearchQuery.pm b/lib/PublicInbox/SearchQuery.pm
index 0f360500..a6b7d843 100644
--- a/lib/PublicInbox/SearchQuery.pm
+++ b/lib/PublicInbox/SearchQuery.pm
@@ -1,7 +1,7 @@
 # Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
-# used by PublicInbox::SearchView
+# used by PublicInbox::SearchView and PublicInbox::WwwListing
 package PublicInbox::SearchQuery;
 use strict;
 use v5.10.1;
@@ -32,11 +32,12 @@ sub qs_html {
 	if (scalar(keys(%override))) {
 		$self = bless { (%$self, %override) }, ref($self);
 	}
-
-	my $q = uri_escape($self->{'q'}, MID_ESC);
-	$q =~ s/%20/+/g; # improve URL readability
-	my $qs = "q=$q";
-
+	my $qs = '';
+	if (defined(my $q = $self->{'q'})) {
+		$q = uri_escape($q, MID_ESC);
+		$q =~ s/%20/+/g; # improve URL readability
+		$qs .= "q=$q";
+	}
 	if (my $o = $self->{o}) { # ignore o == 0
 		$qs .= "&amp;o=$o";
 	}
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index e13359d5..c0c801b3 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -219,7 +219,7 @@ sub search_nav_top {
 	$rv .= qq{</pre></form><pre>};
 }
 
-sub search_nav_bot {
+sub search_nav_bot { # also used by WwwListing for searching extindex miscidx
 	my ($mset, $q) = @_;
 	my $total = $mset->get_matches_estimated;
 	my $l = $q->{l};
diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm
index f28eddf1..eb015742 100644
--- a/lib/PublicInbox/WwwListing.pm
+++ b/lib/PublicInbox/WwwListing.pm
@@ -5,7 +5,7 @@
 # Used by PublicInbox::WWW
 package PublicInbox::WwwListing;
 use strict;
-use PublicInbox::Hval qw(prurl fmt_ts);
+use PublicInbox::Hval qw(prurl fmt_ts ascii_html);
 use PublicInbox::Linkify;
 use PublicInbox::GzipFilter qw(gzf_maybe);
 use PublicInbox::ConfigIter;
@@ -13,18 +13,19 @@ use PublicInbox::WwwStream;
 use bytes (); # bytes::length
 
 sub ibx_entry {
-	my ($ctx, $ibx) = @_;
-	my $mtime = $ibx->modified;
-	my $ts = fmt_ts($mtime);
+	my ($ctx, $ibx, $ce) = @_;
+	$ce->{description} //= $ibx->description;
+	my $ts = fmt_ts($ce->{-modified} //= $ibx->modified);
 	my $url = prurl($ctx->{env}, $ibx->{url});
 	my $tmp = <<"";
 * $ts - $url
-  ${\$ibx->description}
+  $ce->{description}
 
 	if (defined(my $info_url = $ibx->{infourl})) {
 		$tmp .= '  ' . prurl($ctx->{env}, $info_url) . "\n";
 	}
-	push @{$ctx->{-list}}, [ $mtime, $tmp ];
+	push(@{$ctx->{-list}}, (scalar(@_) == 3 ? # $misc in use, already sorted
+				$tmp : [ $ce->{-modified}, $tmp ] ));
 }
 
 sub list_match_i { # ConfigIter callback
@@ -41,7 +42,7 @@ sub list_match_i { # ConfigIter callback
 	}
 }
 
-sub url_regexp {
+sub url_filter {
 	my ($ctx, $key, $default) = @_;
 	$key //= 'publicInbox.wwwListing';
 	$default //= '404';
@@ -50,9 +51,9 @@ again:
 	if ($v eq 'match=domain') {
 		my $h = $ctx->{env}->{HTTP_HOST} // $ctx->{env}->{SERVER_NAME};
 		$h =~ s/:[0-9]+\z//;
-		qr!\A(?:https?:)?//\Q$h\E(?::[0-9]+)?/!i;
+		(qr!\A(?:https?:)?//\Q$h\E(?::[0-9]+)?/!i, "url:$h");
 	} elsif ($v eq 'all') {
-		qr/./;
+		(qr/./, undef);
 	} elsif ($v eq '404') {
 		undef;
 	} else {
@@ -67,22 +68,122 @@ EOF
 
 sub hide_key { 'www' }
 
+sub add_misc_ibx { # MiscSearch->retry_reopen callback
+	my ($misc, $ctx, $re, $qs) = @_;
+	require PublicInbox::SearchQuery;
+	my $q = $ctx->{-sq} = PublicInbox::SearchQuery->new($ctx->{qp});
+	my $o = $q->{o};
+	my ($asc, $min, $max);
+	if ($o < 0) {
+		$asc = 1;
+		$o = -($o + 1); # so [-1] is the last element, like Perl lists
+	}
+	my $r = $q->{r};
+	my $opt = {
+		offset => $o,
+		asc => $asc,
+		relevance => $r,
+		limit => $q->{l}
+	};
+	$qs .= ' type:inbox';
+	if (my $user_query = $q->{'q'}) {
+		$qs = "( $qs ) AND ( $user_query )";
+	}
+	my $mset = $misc->mset($qs, $opt); # sorts by $MODIFIED (mtime)
+	$ctx->{-list} = [];
+	my $pi_cfg = $ctx->{www}->{pi_cfg};
+	for my $mi ($mset->items) {
+		my $doc = $mi->get_document;
+		my ($eidx_key) = PublicInbox::Search::xap_terms('Q', $doc);
+		$eidx_key // next;
+		my $ibx = $pi_cfg->lookup_eidx_key($eidx_key) // next;
+		next if $ibx->{-hide}->{$ctx->hide_key};
+		grep(/$re/, @{$ibx->{url}}) or next;
+		$ctx->ibx_entry($ibx, $misc->doc2ibx_cache_ent($doc));
+		if ($r) { # for descriptions in search_nav_bot
+			my $pct = PublicInbox::Search::get_pct($mi);
+			# only when sorting by relevance, ->items is always
+			# ordered descending:
+			$max //= $pct;
+			$min = $pct;
+		}
+	}
+	if ($r) { # for descriptions in search_nav_bot
+		$q->{-min_pct} = $min;
+		$q->{-max_pct} = $max;
+	}
+	$ctx->{-mset} = $mset;
+	psgi_triple($ctx);
+}
+
 sub response {
 	my ($class, $ctx) = @_;
 	bless $ctx, $class;
-	if (my $ALL = $ctx->{www}->{pi_cfg}->ALL) {
-		$ALL->misc->reopen;
-	}
-	my $re = $ctx->url_regexp or return $ctx->psgi_triple;
-	my $iter = PublicInbox::ConfigIter->new($ctx->{www}->{pi_cfg},
+	my ($re, $qs) = $ctx->url_filter;
+	$re // return $ctx->psgi_triple;
+	if (my $ALL = $ctx->{www}->{pi_cfg}->ALL) { # fast path
+		$ALL->misc->reopen->retry_reopen(\&add_misc_ibx,
+						$ctx, $re, $qs);
+	} else { # slow path, no [extindex "all"] configured
+		my $iter = PublicInbox::ConfigIter->new($ctx->{www}->{pi_cfg},
 						\&list_match_i, $re, $ctx);
-	sub {
-		$ctx->{-wcb} = $_[0]; # HTTP server callback
-		$ctx->{env}->{'pi-httpd.async'} ?
-				$iter->event_step : $iter->each_section;
+		sub {
+			$ctx->{-wcb} = $_[0]; # HTTP server callback
+			$ctx->{env}->{'pi-httpd.async'} ?
+					$iter->event_step : $iter->each_section;
+		}
 	}
 }
 
+sub mset_footer ($$) {
+	my ($ctx, $mset) = @_;
+	# no footer if too few matches
+	return '' if $mset->get_matches_estimated == $mset->size;
+	require PublicInbox::SearchView;
+	PublicInbox::SearchView::search_nav_bot($mset, $ctx->{-sq});
+}
+
+sub mset_nav_top {
+	my ($ctx, $mset) = @_;
+	my $q = $ctx->{-sq};
+	my $qh = $q->{'q'} // '';
+	utf8::decode($qh);
+	$qh = ascii_html($qh);
+	$qh = qq[\nvalue="$qh"] if $qh ne '';
+	my $rv = <<EOM;
+<form
+action="./"><pre><input
+name=q
+type=text$qh /><input
+type=submit
+value="locate inbox" /></pre></form><pre>
+EOM
+	chomp $rv;
+	if (defined($q->{'q'})) {
+		my $initial_q = $ctx->{-uxs_retried};
+		if (defined $initial_q) {
+			my $rewritten = $q->{'q'};
+			utf8::decode($initial_q);
+			utf8::decode($rewritten);
+			$initial_q = ascii_html($initial_q);
+			$rewritten = ascii_html($rewritten);
+			$rv .= " Warning: Initial query:\n <b>$initial_q</b>\n";
+			$rv .= " returned no results, used:\n";
+			$rv .= " <b>$rewritten</b>\n instead\n\n";
+		}
+		$rv .= 'Search results ordered by [';
+		if ($q->{r}) {
+			my $d = $q->qs_html(r => 0);
+			$rv .= qq{<a\nhref="?$d">updated</a>|<b>relevance</b>};
+		} else {
+			my $d = $q->qs_html(r => 1);
+			$rv .= qq{<b>updated</b>|<a\nhref="?$d">relevance</a>};
+		}
+		$rv .= ']';
+	}
+	$rv .= qq{</pre>};
+}
+
 sub psgi_triple {
 	my ($ctx) = @_;
 	my $h = [ 'Content-Type', 'text/html; charset=UTF-8',
@@ -90,17 +191,23 @@ sub psgi_triple {
 	my $gzf = gzf_maybe($h, $ctx->{env});
 	$gzf->zmore('<html><head><title>' .
 				'public-inbox listing</title>' .
-				'</head><body><pre>');
+				'</head><body>');
 	my $code = 404;
-	if (my $list = $ctx->{-list}) {
+	if (my $list = delete $ctx->{-list}) {
+		my $mset = delete $ctx->{-mset};
 		$code = 200;
-		# sort by ->modified
-		@$list = map { $_->[1] } sort { $b->[0] <=> $a->[0] } @$list;
+		if ($mset) { # already sorted, so search bar:
+			$gzf->zmore(mset_nav_top($ctx, $mset));
+		} else { # sort config dump by ->modified
+			@$list = map { $_->[1] }
+				sort { $b->[0] <=> $a->[0] } @$list;
+		}
 		$list = join("\n", @$list);
 		my $l = PublicInbox::Linkify->new;
-		$gzf->zmore($l->to_html($list));
+		$gzf->zmore('<pre>'.$l->to_html($list));
+		$gzf->zmore(mset_footer($ctx, $mset)) if $mset;
 	} else {
-		$gzf->zmore('no inboxes, yet');
+		$gzf->zmore('<pre>no inboxes, yet');
 	}
 	my $out = $gzf->zflush('</pre><hr><pre>'.
 			PublicInbox::WwwStream::code_footer($ctx->{env}) .

^ permalink raw reply related	[relevance 4%]

* [PATCH 0/3] some WWW search things
@ 2021-06-23 11:14  7% Eric Wong
  2021-06-23 11:14  4% ` [PATCH 2/3] www_listing: start updating for pagination + search Eric Wong
  0 siblings, 1 reply; 6+ results
From: Eric Wong @ 2021-06-23 11:14 UTC (permalink / raw)
  To: meta

Needed a break from banging my head trying to deal with
lei multi-directional sync semantics (while dealing with
the summertime heat and bug infestations :<)

www_listing ought to be faster when dealing with many inboxes,
more to come on that front.

Eric Wong (3):
  search: make xap_terms easier-to-use and use it more
  www_listing: start updating for pagination + search
  www: do not warn on blank query parameters

 lib/PublicInbox/Config.pm       |   5 ++
 lib/PublicInbox/LeiSearch.pm    |  16 ++--
 lib/PublicInbox/LeiXSearch.pm   |   4 +-
 lib/PublicInbox/ManifestJsGz.pm |   2 +-
 lib/PublicInbox/MiscSearch.pm   |  57 ++++++------
 lib/PublicInbox/Search.pm       |  22 ++---
 lib/PublicInbox/SearchIdx.pm    |   5 +-
 lib/PublicInbox/SearchQuery.pm  |  13 +--
 lib/PublicInbox/SearchView.pm   |   2 +-
 lib/PublicInbox/WWW.pm          |   5 +-
 lib/PublicInbox/WwwListing.pm   | 155 +++++++++++++++++++++++++++-----
 t/lei_store.t                   |   3 +-
 t/psgi_search.t                 |   4 +
 13 files changed, 200 insertions(+), 93 deletions(-)

^ permalink raw reply	[relevance 7%]

Results 1-6 of 6 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2021-06-23 11:14  7% [PATCH 0/3] some WWW search things Eric Wong
2021-06-23 11:14  4% ` [PATCH 2/3] www_listing: start updating for pagination + search Eric Wong
2021-06-24 12:59  5% [PATCH] www_listing: fix manifest.js.gz generation with extindex "all" Eric Wong
2021-06-27 20:28  6% empty /manifest.js.gz response as of 520be116 Kyle Meyer
2021-06-28 21:59  5% ` [PATCH] www: fix manifest.js.gz for default publicInbox.grokManifest Eric Wong
2021-06-30  2:37  0%   ` Kyle Meyer

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).