* Re: [PATCH] www: fix manifest.js.gz for default publicInbox.grokManifest
2021-06-28 21:59 5% ` [PATCH] www: fix manifest.js.gz for default publicInbox.grokManifest Eric Wong
@ 2021-06-30 2:37 0% ` Kyle Meyer
0 siblings, 0 replies; 6+ results
From: Kyle Meyer @ 2021-06-30 2:37 UTC (permalink / raw)
To: Eric Wong; +Cc: meta
Eric Wong writes:
> Fixes: 520be116e8a686cb ("www_listing: start updating for pagination + search")
Confirmed. Thank you for the quick fix.
^ permalink raw reply [relevance 0%]
* [PATCH] www: fix manifest.js.gz for default publicInbox.grokManifest
2021-06-27 20:28 6% empty /manifest.js.gz response as of 520be116 Kyle Meyer
@ 2021-06-28 21:59 5% ` Eric Wong
2021-06-30 2:37 0% ` Kyle Meyer
0 siblings, 1 reply; 6+ results
From: Eric Wong @ 2021-06-28 21:59 UTC (permalink / raw)
To: meta; +Cc: Kyle Meyer
ManifestJsGz->response was not invoking the new "url_filter"
method properly. Furthermore, fix url_filter for returning 404
responses.
Reported-by: Kyle Meyer <kyle@kyleam.com>
Link: https://public-inbox.org/meta/87fsx3128a.fsf@kyleam.com/
Fixes: 520be116e8a686cb ("www_listing: start updating for pagination + search")
---
lib/PublicInbox/ManifestJsGz.pm | 4 +--
lib/PublicInbox/WwwListing.pm | 2 +-
t/www_listing.t | 53 ++++++++++++++++++++-------------
3 files changed, 35 insertions(+), 24 deletions(-)
diff --git a/lib/PublicInbox/ManifestJsGz.pm b/lib/PublicInbox/ManifestJsGz.pm
index 9dc10791..7fee78dd 100644
--- a/lib/PublicInbox/ManifestJsGz.pm
+++ b/lib/PublicInbox/ManifestJsGz.pm
@@ -13,11 +13,11 @@ use HTTP::Date qw(time2str);
my $json = PublicInbox::Config::json();
-sub url_regexp {
+sub url_filter {
my ($ctx) = @_;
# grokmirror uses relative paths, so it's domain-dependent
# SUPER calls PublicInbox::WwwListing::url_filter
- ($ctx->url_filter('publicInbox.grokManifest', 'match=domain'))[0];
+ $ctx->SUPER::url_filter('publicInbox.grokManifest', 'match=domain');
}
sub inject_entry ($$$;$) {
diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm
index 76c76663..98a69986 100644
--- a/lib/PublicInbox/WwwListing.pm
+++ b/lib/PublicInbox/WwwListing.pm
@@ -55,7 +55,7 @@ again:
} elsif ($v eq 'all') {
(qr/./, undef);
} elsif ($v eq '404') {
- undef;
+ (undef, undef);
} else {
warn <<EOF;
`$v' is not a valid value for `$key'
diff --git a/t/www_listing.t b/t/www_listing.t
index 6a2892de..6b3b408f 100644
--- a/t/www_listing.t
+++ b/t/www_listing.t
@@ -1,14 +1,15 @@
+#!perl -w
# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# manifest.js.gz generation and grok-pull integration test
use strict;
-use warnings;
+use v5.10.1;
use Test::More;
use PublicInbox::Spawn qw(which);
use PublicInbox::TestCommon;
use PublicInbox::Import;
-require_mods(qw(json URI::Escape Plack::Builder Digest::SHA
- IO::Compress::Gzip IO::Uncompress::Gunzip HTTP::Tiny));
+use IO::Uncompress::Gunzip qw(gunzip);
+require_mods(qw(json URI::Escape Plack::Builder Digest::SHA HTTP::Tiny));
require PublicInbox::WwwListing;
require PublicInbox::ManifestJsGz;
use PublicInbox::Config;
@@ -32,21 +33,23 @@ like($bare->manifest_entry->{fingerprint}, qr/\A[a-f0-9]{40}\z/,
'got fingerprint with non-empty repo');
sub tiny_test {
- my ($json, $host, $port) = @_;
- my $tmp;
+ my ($json, $host, $port, $html) = @_;
+ my ($tmp, $res);
my $http = HTTP::Tiny->new;
- my $res = $http->get("http://$host:$port/");
- is($res->{status}, 200, 'got HTML listing');
- like($res->{content}, qr!</html>!si, 'listing looks like HTML');
-
- $res = $http->get("http://$host:$port/", {'Accept-Encoding'=>'gzip'});
- is($res->{status}, 200, 'got gzipped HTML listing');
- IO::Uncompress::Gunzip::gunzip(\(delete $res->{content}) => \$tmp);
- like($tmp, qr!</html>!si, 'unzipped listing looks like HTML');
-
+ if ($html) {
+ $res = $http->get("http://$host:$port/");
+ is($res->{status}, 200, 'got HTML listing');
+ like($res->{content}, qr!</html>!si, 'listing looks like HTML');
+
+ $res = $http->get("http://$host:$port/",
+ {'Accept-Encoding'=>'gzip'});
+ is($res->{status}, 200, 'got gzipped HTML listing');
+ gunzip(\(delete $res->{content}) => \$tmp);
+ like($tmp, qr!</html>!si, 'unzipped listing looks like HTML');
+ }
$res = $http->get("http://$host:$port/manifest.js.gz");
is($res->{status}, 200, 'got manifest');
- IO::Uncompress::Gunzip::gunzip(\(delete $res->{content}) => \$tmp);
+ gunzip(\(delete $res->{content}) => \$tmp);
unlike($tmp, qr/"modified":\s*"/, 'modified is an integer');
my $manifest = $json->decode($tmp);
ok(my $clone = $manifest->{'/alt'}, '/alt in manifest');
@@ -95,10 +98,9 @@ SKIP: {
"lorelei \xc4\x80"), 0,
'set gitweb user');
ok(unlink("$bare->{git_dir}/description"), 'removed bare/description');
- open $fh, '>', $cfgfile or die;
- print $fh <<"" or die;
-[publicinbox]
- wwwlisting = all
+ open $fh, '>', $cfgfile or xbail "open $cfgfile: $!";
+ $fh->autoflush(1);
+ print $fh <<"" or xbail "print $!";
[publicinbox "bare"]
inboxdir = $bare->{git_dir}
url = http://$host/bare
@@ -112,13 +114,22 @@ SKIP: {
url = http://$host/v2
address = v2\@example.com
- close $fh or die;
my $env = { PI_CONFIG => $cfgfile };
my $cmd = [ '-httpd', '-W0', "--stdout=$out", "--stderr=$err" ];
$td = start_script($cmd, $env, { 3 => $sock });
- $sock = undef;
+ # default publicinboxGrokManifest match=domain default
tiny_test($json, $host, $port);
+ undef $td;
+
+ print $fh <<"" or xbail "print $!";
+[publicinbox]
+ wwwlisting = all
+
+ close $fh or xbail "close $!";
+ $td = start_script($cmd, $env, { 3 => $sock });
+ tiny_test($json, $host, $port, 1);
+ undef $sock;
my $grok_pull = which('grok-pull') or
skip('grok-pull not available', 12);
^ permalink raw reply related [relevance 5%]
* empty /manifest.js.gz response as of 520be116
@ 2021-06-27 20:28 6% Kyle Meyer
2021-06-28 21:59 5% ` [PATCH] www: fix manifest.js.gz for default publicInbox.grokManifest Eric Wong
0 siblings, 1 reply; 6+ results
From: Kyle Meyer @ 2021-06-27 20:28 UTC (permalink / raw)
To: meta
I recently upgraded a server from 08b649735 to 5860b498a and noticed
that grok-pull didn't bring in any updates. It looks like what's going
on is that the top-level /manifest.js.gz endpoint is now coming up
empty.
A minimal example on 5860b498a:
$ git clone --mirror https://try.public-inbox.org/test test
$ export PI_CONFIG="$PWD/config"
$ public-inbox-init -V1 test test/ \
http://localhost:8080/test test@public-inbox.org
$ public-inbox-index -v test
$ public-inbox-httpd &
$ curl -fsS http://localhost:8080/manifest.js.gz | zcat | jq
{}
On 08b649735, the output is
{
"/test": {
"reference": null,
...
}
}
Bisecting with the script below points to 520be116 (www_listing: start
updating for pagination + search, 2021-06-23).
--8<---------------cut here---------------start------------->8---
#!/bin/sh
perl Makefile.PL && make && sudo make install || exit 125
# Local clone of https://try.public-inbox.org/test
pi_test=/tmp/pi-test
cd "$(mktemp -d "${TMPDIR:-/tmp}"/pi-XXXXXXX)"
export PI_CONFIG="$PWD/config"
git clone --mirror "$pi_test" test
public-inbox-init -V1 test test/ \
http://localhost:8080/test test@public-inbox.org
public-inbox-index -v test
trap 'trap - TERM && pkill -P $$' INT TERM EXIT
public-inbox-httpd &
sleep 1
curl -fsS http://localhost:8080/manifest.js.gz | zcat | jq -e '.["/test"]'
--8<---------------cut here---------------end--------------->8---
^ permalink raw reply [relevance 6%]
* [PATCH] www_listing: fix manifest.js.gz generation with extindex "all"
@ 2021-06-24 12:59 5% Eric Wong
0 siblings, 0 replies; 6+ results
From: Eric Wong @ 2021-06-24 12:59 UTC (permalink / raw)
To: meta
WwwListing and ManifestJsGz may be too different nowadays to
be worth the code sharing between them.
Update some comments and note we still needs better tests :x
Fixes: 520be116e8a686cb ("www_listing: start updating for pagination + search")
---
lib/PublicInbox/ManifestJsGz.pm | 26 +++++++++++++++++++-------
lib/PublicInbox/WwwListing.pm | 3 ++-
2 files changed, 21 insertions(+), 8 deletions(-)
diff --git a/lib/PublicInbox/ManifestJsGz.pm b/lib/PublicInbox/ManifestJsGz.pm
index e7bb0e86..9dc10791 100644
--- a/lib/PublicInbox/ManifestJsGz.pm
+++ b/lib/PublicInbox/ManifestJsGz.pm
@@ -13,12 +13,11 @@ use HTTP::Date qw(time2str);
my $json = PublicInbox::Config::json();
-# called by WwwListing
sub url_regexp {
my ($ctx) = @_;
# grokmirror uses relative paths, so it's domain-dependent
- # SUPER calls PublicInbox::WwwListing::url_regexp
- $ctx->SUPER::url_regexp('publicInbox.grokManifest', 'match=domain');
+ # SUPER calls PublicInbox::WwwListing::url_filter
+ ($ctx->url_filter('publicInbox.grokManifest', 'match=domain'))[0];
}
sub inject_entry ($$$;$) {
@@ -29,7 +28,7 @@ sub inject_entry ($$$;$) {
$ctx->{manifest}->{$url_path} = $ent;
}
-sub manifest_add ($$;$$) {
+sub manifest_add ($$;$$) { # slow path w/o extindex "all"
my ($ctx, $ibx, $epoch, $default_desc) = @_;
my $url_path = "/$ibx->{name}";
my $git;
@@ -70,20 +69,33 @@ sub eidx_manifest_add ($$$) {
}
}
+sub response {
+ my ($class, $ctx) = @_;
+ bless $ctx, $class;
+ my ($re, undef) = $ctx->url_filter;
+ $re // return psgi_triple($ctx);
+ my $iter = PublicInbox::ConfigIter->new($ctx->{www}->{pi_cfg},
+ $ctx->can('list_match_i'), $re, $ctx);
+ sub {
+ $ctx->{-wcb} = $_[0]; # HTTP server callback
+ $ctx->{env}->{'pi-httpd.async'} ?
+ $iter->event_step : $iter->each_section;
+ }
+}
+
sub ibx_entry {
my ($ctx, $ibx) = @_;
my $ALL = $ctx->{www}->{pi_cfg}->ALL;
- if ($ALL) {
+ if ($ALL) { # FIXME: test this in t/
eidx_manifest_add($ctx, $ALL, $ibx);
} else {
slow_manifest_add($ctx, $ibx);
+ warn "E: $@" if $@;
}
- warn "E: $@" if $@;
}
sub hide_key { 'manifest' } # for WwwListing->list_match_i
-# overrides WwwListing->psgi_triple
sub psgi_triple {
my ($ctx) = @_;
my $abs2urlpath = delete($ctx->{-abs2urlpath}) // {};
diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm
index eb015742..76c76663 100644
--- a/lib/PublicInbox/WwwListing.pm
+++ b/lib/PublicInbox/WwwListing.pm
@@ -90,7 +90,7 @@ sub add_misc_ibx { # MiscSearch->retry_reopen callback
$qs = "( $qs ) AND ( $user_query )";
}
my $mset = $misc->mset($qs, $opt); # sorts by $MODIFIED (mtime)
- $ctx->{-list} = [];
+ delete $ctx->{-list}; # reset if retried
my $pi_cfg = $ctx->{www}->{pi_cfg};
for my $mi ($mset->items) {
my $doc = $mi->get_document;
@@ -122,6 +122,7 @@ sub response {
my ($re, $qs) = $ctx->url_filter;
$re // return $ctx->psgi_triple;
if (my $ALL = $ctx->{www}->{pi_cfg}->ALL) { # fast path
+ # FIXME: test this in t/
$ALL->misc->reopen->retry_reopen(\&add_misc_ibx,
$ctx, $re, $qs);
} else { # slow path, no [extindex "all"] configured
^ permalink raw reply related [relevance 5%]
* [PATCH 2/3] www_listing: start updating for pagination + search
2021-06-23 11:14 7% [PATCH 0/3] some WWW search things Eric Wong
@ 2021-06-23 11:14 4% ` Eric Wong
0 siblings, 0 replies; 6+ results
From: Eric Wong @ 2021-06-23 11:14 UTC (permalink / raw)
To: meta
When dealing with thousands of inboxes, displaying all of
them on a single page isn't going to work. So steal some
pagination and search results code from the message search
to generate some basic HTML output that looks good in w3m.
---
lib/PublicInbox/Config.pm | 5 ++
lib/PublicInbox/ManifestJsGz.pm | 2 +-
lib/PublicInbox/MiscSearch.pm | 34 +++----
lib/PublicInbox/SearchQuery.pm | 13 +--
lib/PublicInbox/SearchView.pm | 2 +-
lib/PublicInbox/WwwListing.pm | 155 +++++++++++++++++++++++++++-----
6 files changed, 164 insertions(+), 47 deletions(-)
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index 3f0f5a01..36f2fafb 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -97,6 +97,11 @@ sub lookup_ei {
$self->{-ei_by_name}->{$name} //= _fill_ei($self, $name);
}
+sub lookup_eidx_key {
+ my ($self, $eidx_key) = @_;
+ _lookup_fill($self, '-by_eidx_key', $eidx_key);
+}
+
# special case for [extindex "all"]
sub ALL { lookup_ei($_[0], 'all') }
diff --git a/lib/PublicInbox/ManifestJsGz.pm b/lib/PublicInbox/ManifestJsGz.pm
index 31cf15dc..e7bb0e86 100644
--- a/lib/PublicInbox/ManifestJsGz.pm
+++ b/lib/PublicInbox/ManifestJsGz.pm
@@ -81,7 +81,7 @@ sub ibx_entry {
warn "E: $@" if $@;
}
-sub hide_key { 'manifest' }
+sub hide_key { 'manifest' } # for WwwListing->list_match_i
# overrides WwwListing->psgi_triple
sub psgi_triple {
diff --git a/lib/PublicInbox/MiscSearch.pm b/lib/PublicInbox/MiscSearch.pm
index 4e010453..6b575b0d 100644
--- a/lib/PublicInbox/MiscSearch.pm
+++ b/lib/PublicInbox/MiscSearch.pm
@@ -59,7 +59,7 @@ sub misc_enquire_once { # retry_reopen callback
$eq->set_query($qr);
my $desc = !$opt->{asc};
my $rel = $opt->{relevance} // 0;
- if ($rel == -1) { # ORDER BY docid/UID
+ if ($rel == -1) { # ORDER BY docid
$eq->set_docid_order($PublicInbox::Search::ENQ_ASCENDING);
$eq->set_weighting_scheme($PublicInbox::Search::X{BoolWeight}->new);
} elsif ($rel) {
@@ -132,6 +132,23 @@ sub ibx_data_once {
}
}
+sub doc2ibx_cache_ent { # @_ == ($self, $doc) OR ($doc)
+ my ($doc) = $_[-1];
+ my $d;
+ my $data = $json->decode($doc->get_data);
+ for (values %$data) {
+ $d = $_->{description} // next;
+ $d =~ s/ \[epoch [0-9]+\]\z// or next;
+ last;
+ }
+ {
+ uidvalidity => int_val($doc, $UIDVALIDITY),
+ -modified => int_val($doc, $MODIFIED),
+ # extract description from manifest.js.gz epoch description
+ description => $d
+ };
+}
+
sub inbox_data {
my ($self, $ibx) = @_;
retry_reopen($self, \&ibx_data_once, $ibx);
@@ -141,20 +158,7 @@ sub ibx_cache_load {
my ($doc, $cache) = @_;
my ($eidx_key) = xap_terms('Q', $doc);
return unless defined($eidx_key); # expired
- my $ce = $cache->{$eidx_key} = {};
- $ce->{uidvalidity} = int_val($doc, $UIDVALIDITY);
- $ce->{-modified} = int_val($doc, $MODIFIED);
- $ce->{description} = do {
- # extract description from manifest.js.gz epoch description
- my $d;
- my $data = $json->decode($doc->get_data);
- for (values %$data) {
- $d = $_->{description} // next;
- $d =~ s/ \[epoch [0-9]+\]\z// or next;
- last;
- }
- $d;
- }
+ $cache->{$eidx_key} = doc2ibx_cache_ent($doc);
}
sub _nntpd_cache_load { # retry_reopen callback
diff --git a/lib/PublicInbox/SearchQuery.pm b/lib/PublicInbox/SearchQuery.pm
index 0f360500..a6b7d843 100644
--- a/lib/PublicInbox/SearchQuery.pm
+++ b/lib/PublicInbox/SearchQuery.pm
@@ -1,7 +1,7 @@
# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-# used by PublicInbox::SearchView
+# used by PublicInbox::SearchView and PublicInbox::WwwListing
package PublicInbox::SearchQuery;
use strict;
use v5.10.1;
@@ -32,11 +32,12 @@ sub qs_html {
if (scalar(keys(%override))) {
$self = bless { (%$self, %override) }, ref($self);
}
-
- my $q = uri_escape($self->{'q'}, MID_ESC);
- $q =~ s/%20/+/g; # improve URL readability
- my $qs = "q=$q";
-
+ my $qs = '';
+ if (defined(my $q = $self->{'q'})) {
+ $q = uri_escape($q, MID_ESC);
+ $q =~ s/%20/+/g; # improve URL readability
+ $qs .= "q=$q";
+ }
if (my $o = $self->{o}) { # ignore o == 0
$qs .= "&o=$o";
}
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index e13359d5..c0c801b3 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -219,7 +219,7 @@ sub search_nav_top {
$rv .= qq{</pre></form><pre>};
}
-sub search_nav_bot {
+sub search_nav_bot { # also used by WwwListing for searching extindex miscidx
my ($mset, $q) = @_;
my $total = $mset->get_matches_estimated;
my $l = $q->{l};
diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm
index f28eddf1..eb015742 100644
--- a/lib/PublicInbox/WwwListing.pm
+++ b/lib/PublicInbox/WwwListing.pm
@@ -5,7 +5,7 @@
# Used by PublicInbox::WWW
package PublicInbox::WwwListing;
use strict;
-use PublicInbox::Hval qw(prurl fmt_ts);
+use PublicInbox::Hval qw(prurl fmt_ts ascii_html);
use PublicInbox::Linkify;
use PublicInbox::GzipFilter qw(gzf_maybe);
use PublicInbox::ConfigIter;
@@ -13,18 +13,19 @@ use PublicInbox::WwwStream;
use bytes (); # bytes::length
sub ibx_entry {
- my ($ctx, $ibx) = @_;
- my $mtime = $ibx->modified;
- my $ts = fmt_ts($mtime);
+ my ($ctx, $ibx, $ce) = @_;
+ $ce->{description} //= $ibx->description;
+ my $ts = fmt_ts($ce->{-modified} //= $ibx->modified);
my $url = prurl($ctx->{env}, $ibx->{url});
my $tmp = <<"";
* $ts - $url
- ${\$ibx->description}
+ $ce->{description}
if (defined(my $info_url = $ibx->{infourl})) {
$tmp .= ' ' . prurl($ctx->{env}, $info_url) . "\n";
}
- push @{$ctx->{-list}}, [ $mtime, $tmp ];
+ push(@{$ctx->{-list}}, (scalar(@_) == 3 ? # $misc in use, already sorted
+ $tmp : [ $ce->{-modified}, $tmp ] ));
}
sub list_match_i { # ConfigIter callback
@@ -41,7 +42,7 @@ sub list_match_i { # ConfigIter callback
}
}
-sub url_regexp {
+sub url_filter {
my ($ctx, $key, $default) = @_;
$key //= 'publicInbox.wwwListing';
$default //= '404';
@@ -50,9 +51,9 @@ again:
if ($v eq 'match=domain') {
my $h = $ctx->{env}->{HTTP_HOST} // $ctx->{env}->{SERVER_NAME};
$h =~ s/:[0-9]+\z//;
- qr!\A(?:https?:)?//\Q$h\E(?::[0-9]+)?/!i;
+ (qr!\A(?:https?:)?//\Q$h\E(?::[0-9]+)?/!i, "url:$h");
} elsif ($v eq 'all') {
- qr/./;
+ (qr/./, undef);
} elsif ($v eq '404') {
undef;
} else {
@@ -67,22 +68,122 @@ EOF
sub hide_key { 'www' }
+sub add_misc_ibx { # MiscSearch->retry_reopen callback
+ my ($misc, $ctx, $re, $qs) = @_;
+ require PublicInbox::SearchQuery;
+ my $q = $ctx->{-sq} = PublicInbox::SearchQuery->new($ctx->{qp});
+ my $o = $q->{o};
+ my ($asc, $min, $max);
+ if ($o < 0) {
+ $asc = 1;
+ $o = -($o + 1); # so [-1] is the last element, like Perl lists
+ }
+ my $r = $q->{r};
+ my $opt = {
+ offset => $o,
+ asc => $asc,
+ relevance => $r,
+ limit => $q->{l}
+ };
+ $qs .= ' type:inbox';
+ if (my $user_query = $q->{'q'}) {
+ $qs = "( $qs ) AND ( $user_query )";
+ }
+ my $mset = $misc->mset($qs, $opt); # sorts by $MODIFIED (mtime)
+ $ctx->{-list} = [];
+ my $pi_cfg = $ctx->{www}->{pi_cfg};
+ for my $mi ($mset->items) {
+ my $doc = $mi->get_document;
+ my ($eidx_key) = PublicInbox::Search::xap_terms('Q', $doc);
+ $eidx_key // next;
+ my $ibx = $pi_cfg->lookup_eidx_key($eidx_key) // next;
+ next if $ibx->{-hide}->{$ctx->hide_key};
+ grep(/$re/, @{$ibx->{url}}) or next;
+ $ctx->ibx_entry($ibx, $misc->doc2ibx_cache_ent($doc));
+ if ($r) { # for descriptions in search_nav_bot
+ my $pct = PublicInbox::Search::get_pct($mi);
+ # only when sorting by relevance, ->items is always
+ # ordered descending:
+ $max //= $pct;
+ $min = $pct;
+ }
+ }
+ if ($r) { # for descriptions in search_nav_bot
+ $q->{-min_pct} = $min;
+ $q->{-max_pct} = $max;
+ }
+ $ctx->{-mset} = $mset;
+ psgi_triple($ctx);
+}
+
sub response {
my ($class, $ctx) = @_;
bless $ctx, $class;
- if (my $ALL = $ctx->{www}->{pi_cfg}->ALL) {
- $ALL->misc->reopen;
- }
- my $re = $ctx->url_regexp or return $ctx->psgi_triple;
- my $iter = PublicInbox::ConfigIter->new($ctx->{www}->{pi_cfg},
+ my ($re, $qs) = $ctx->url_filter;
+ $re // return $ctx->psgi_triple;
+ if (my $ALL = $ctx->{www}->{pi_cfg}->ALL) { # fast path
+ $ALL->misc->reopen->retry_reopen(\&add_misc_ibx,
+ $ctx, $re, $qs);
+ } else { # slow path, no [extindex "all"] configured
+ my $iter = PublicInbox::ConfigIter->new($ctx->{www}->{pi_cfg},
\&list_match_i, $re, $ctx);
- sub {
- $ctx->{-wcb} = $_[0]; # HTTP server callback
- $ctx->{env}->{'pi-httpd.async'} ?
- $iter->event_step : $iter->each_section;
+ sub {
+ $ctx->{-wcb} = $_[0]; # HTTP server callback
+ $ctx->{env}->{'pi-httpd.async'} ?
+ $iter->event_step : $iter->each_section;
+ }
}
}
+sub mset_footer ($$) {
+ my ($ctx, $mset) = @_;
+ # no footer if too few matches
+ return '' if $mset->get_matches_estimated == $mset->size;
+ require PublicInbox::SearchView;
+ PublicInbox::SearchView::search_nav_bot($mset, $ctx->{-sq});
+}
+
+sub mset_nav_top {
+ my ($ctx, $mset) = @_;
+ my $q = $ctx->{-sq};
+ my $qh = $q->{'q'} // '';
+ utf8::decode($qh);
+ $qh = ascii_html($qh);
+ $qh = qq[\nvalue="$qh"] if $qh ne '';
+ my $rv = <<EOM;
+<form
+action="./"><pre><input
+name=q
+type=text$qh /><input
+type=submit
+value="locate inbox" /></pre></form><pre>
+EOM
+ chomp $rv;
+ if (defined($q->{'q'})) {
+ my $initial_q = $ctx->{-uxs_retried};
+ if (defined $initial_q) {
+ my $rewritten = $q->{'q'};
+ utf8::decode($initial_q);
+ utf8::decode($rewritten);
+ $initial_q = ascii_html($initial_q);
+ $rewritten = ascii_html($rewritten);
+ $rv .= " Warning: Initial query:\n <b>$initial_q</b>\n";
+ $rv .= " returned no results, used:\n";
+ $rv .= " <b>$rewritten</b>\n instead\n\n";
+ }
+ $rv .= 'Search results ordered by [';
+ if ($q->{r}) {
+ my $d = $q->qs_html(r => 0);
+ $rv .= qq{<a\nhref="?$d">updated</a>|<b>relevance</b>};
+ } else {
+ my $d = $q->qs_html(r => 1);
+ $rv .= qq{<b>updated</b>|<a\nhref="?$d">relevance</a>};
+ }
+ $rv .= ']';
+ }
+ $rv .= qq{</pre>};
+}
+
sub psgi_triple {
my ($ctx) = @_;
my $h = [ 'Content-Type', 'text/html; charset=UTF-8',
@@ -90,17 +191,23 @@ sub psgi_triple {
my $gzf = gzf_maybe($h, $ctx->{env});
$gzf->zmore('<html><head><title>' .
'public-inbox listing</title>' .
- '</head><body><pre>');
+ '</head><body>');
my $code = 404;
- if (my $list = $ctx->{-list}) {
+ if (my $list = delete $ctx->{-list}) {
+ my $mset = delete $ctx->{-mset};
$code = 200;
- # sort by ->modified
- @$list = map { $_->[1] } sort { $b->[0] <=> $a->[0] } @$list;
+ if ($mset) { # already sorted, so search bar:
+ $gzf->zmore(mset_nav_top($ctx, $mset));
+ } else { # sort config dump by ->modified
+ @$list = map { $_->[1] }
+ sort { $b->[0] <=> $a->[0] } @$list;
+ }
$list = join("\n", @$list);
my $l = PublicInbox::Linkify->new;
- $gzf->zmore($l->to_html($list));
+ $gzf->zmore('<pre>'.$l->to_html($list));
+ $gzf->zmore(mset_footer($ctx, $mset)) if $mset;
} else {
- $gzf->zmore('no inboxes, yet');
+ $gzf->zmore('<pre>no inboxes, yet');
}
my $out = $gzf->zflush('</pre><hr><pre>'.
PublicInbox::WwwStream::code_footer($ctx->{env}) .
^ permalink raw reply related [relevance 4%]
* [PATCH 0/3] some WWW search things
@ 2021-06-23 11:14 7% Eric Wong
2021-06-23 11:14 4% ` [PATCH 2/3] www_listing: start updating for pagination + search Eric Wong
0 siblings, 1 reply; 6+ results
From: Eric Wong @ 2021-06-23 11:14 UTC (permalink / raw)
To: meta
Needed a break from banging my head trying to deal with
lei multi-directional sync semantics (while dealing with
the summertime heat and bug infestations :<)
www_listing ought to be faster when dealing with many inboxes,
more to come on that front.
Eric Wong (3):
search: make xap_terms easier-to-use and use it more
www_listing: start updating for pagination + search
www: do not warn on blank query parameters
lib/PublicInbox/Config.pm | 5 ++
lib/PublicInbox/LeiSearch.pm | 16 ++--
lib/PublicInbox/LeiXSearch.pm | 4 +-
lib/PublicInbox/ManifestJsGz.pm | 2 +-
lib/PublicInbox/MiscSearch.pm | 57 ++++++------
lib/PublicInbox/Search.pm | 22 ++---
lib/PublicInbox/SearchIdx.pm | 5 +-
lib/PublicInbox/SearchQuery.pm | 13 +--
lib/PublicInbox/SearchView.pm | 2 +-
lib/PublicInbox/WWW.pm | 5 +-
lib/PublicInbox/WwwListing.pm | 155 +++++++++++++++++++++++++++-----
t/lei_store.t | 3 +-
t/psgi_search.t | 4 +
13 files changed, 200 insertions(+), 93 deletions(-)
^ permalink raw reply [relevance 7%]
Results 1-6 of 6 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2021-06-23 11:14 7% [PATCH 0/3] some WWW search things Eric Wong
2021-06-23 11:14 4% ` [PATCH 2/3] www_listing: start updating for pagination + search Eric Wong
2021-06-24 12:59 5% [PATCH] www_listing: fix manifest.js.gz generation with extindex "all" Eric Wong
2021-06-27 20:28 6% empty /manifest.js.gz response as of 520be116 Kyle Meyer
2021-06-28 21:59 5% ` [PATCH] www: fix manifest.js.gz for default publicInbox.grokManifest Eric Wong
2021-06-30 2:37 0% ` Kyle Meyer
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).