diff options
author | Eric Wong <e@80x24.org> | 2021-06-23 07:14:21 -0400 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2021-06-23 19:24:49 +0000 |
commit | 520be116e8a686cb223b48fad1de29201dee45be (patch) | |
tree | a20d7a81be01fd67c6346efe5e06f05be02f58d8 | |
parent | 4d594e98063aaad1ce9a90709af7edc5c44a0163 (diff) | |
download | public-inbox-520be116e8a686cb223b48fad1de29201dee45be.tar.gz |
When dealing with thousands of inboxes, displaying all of them on a single page isn't going to work. So steal some pagination and search results code from the message search to generate some basic HTML output that looks good in w3m.
-rw-r--r-- | lib/PublicInbox/Config.pm | 5 | ||||
-rw-r--r-- | lib/PublicInbox/ManifestJsGz.pm | 2 | ||||
-rw-r--r-- | lib/PublicInbox/MiscSearch.pm | 34 | ||||
-rw-r--r-- | lib/PublicInbox/SearchQuery.pm | 13 | ||||
-rw-r--r-- | lib/PublicInbox/SearchView.pm | 2 | ||||
-rw-r--r-- | lib/PublicInbox/WwwListing.pm | 155 |
6 files changed, 164 insertions, 47 deletions
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index 3f0f5a01..36f2fafb 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -97,6 +97,11 @@ sub lookup_ei { $self->{-ei_by_name}->{$name} //= _fill_ei($self, $name); } +sub lookup_eidx_key { + my ($self, $eidx_key) = @_; + _lookup_fill($self, '-by_eidx_key', $eidx_key); +} + # special case for [extindex "all"] sub ALL { lookup_ei($_[0], 'all') } diff --git a/lib/PublicInbox/ManifestJsGz.pm b/lib/PublicInbox/ManifestJsGz.pm index 31cf15dc..e7bb0e86 100644 --- a/lib/PublicInbox/ManifestJsGz.pm +++ b/lib/PublicInbox/ManifestJsGz.pm @@ -81,7 +81,7 @@ sub ibx_entry { warn "E: $@" if $@; } -sub hide_key { 'manifest' } +sub hide_key { 'manifest' } # for WwwListing->list_match_i # overrides WwwListing->psgi_triple sub psgi_triple { diff --git a/lib/PublicInbox/MiscSearch.pm b/lib/PublicInbox/MiscSearch.pm index 4e010453..6b575b0d 100644 --- a/lib/PublicInbox/MiscSearch.pm +++ b/lib/PublicInbox/MiscSearch.pm @@ -59,7 +59,7 @@ sub misc_enquire_once { # retry_reopen callback $eq->set_query($qr); my $desc = !$opt->{asc}; my $rel = $opt->{relevance} // 0; - if ($rel == -1) { # ORDER BY docid/UID + if ($rel == -1) { # ORDER BY docid $eq->set_docid_order($PublicInbox::Search::ENQ_ASCENDING); $eq->set_weighting_scheme($PublicInbox::Search::X{BoolWeight}->new); } elsif ($rel) { @@ -132,6 +132,23 @@ sub ibx_data_once { } } +sub doc2ibx_cache_ent { # @_ == ($self, $doc) OR ($doc) + my ($doc) = $_[-1]; + my $d; + my $data = $json->decode($doc->get_data); + for (values %$data) { + $d = $_->{description} // next; + $d =~ s/ \[epoch [0-9]+\]\z// or next; + last; + } + { + uidvalidity => int_val($doc, $UIDVALIDITY), + -modified => int_val($doc, $MODIFIED), + # extract description from manifest.js.gz epoch description + description => $d + }; +} + sub inbox_data { my ($self, $ibx) = @_; retry_reopen($self, \&ibx_data_once, $ibx); @@ -141,20 +158,7 @@ sub ibx_cache_load { my ($doc, $cache) = @_; my ($eidx_key) = xap_terms('Q', $doc); return unless defined($eidx_key); # expired - my $ce = $cache->{$eidx_key} = {}; - $ce->{uidvalidity} = int_val($doc, $UIDVALIDITY); - $ce->{-modified} = int_val($doc, $MODIFIED); - $ce->{description} = do { - # extract description from manifest.js.gz epoch description - my $d; - my $data = $json->decode($doc->get_data); - for (values %$data) { - $d = $_->{description} // next; - $d =~ s/ \[epoch [0-9]+\]\z// or next; - last; - } - $d; - } + $cache->{$eidx_key} = doc2ibx_cache_ent($doc); } sub _nntpd_cache_load { # retry_reopen callback diff --git a/lib/PublicInbox/SearchQuery.pm b/lib/PublicInbox/SearchQuery.pm index 0f360500..a6b7d843 100644 --- a/lib/PublicInbox/SearchQuery.pm +++ b/lib/PublicInbox/SearchQuery.pm @@ -1,7 +1,7 @@ # Copyright (C) 2015-2021 all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> -# used by PublicInbox::SearchView +# used by PublicInbox::SearchView and PublicInbox::WwwListing package PublicInbox::SearchQuery; use strict; use v5.10.1; @@ -32,11 +32,12 @@ sub qs_html { if (scalar(keys(%override))) { $self = bless { (%$self, %override) }, ref($self); } - - my $q = uri_escape($self->{'q'}, MID_ESC); - $q =~ s/%20/+/g; # improve URL readability - my $qs = "q=$q"; - + my $qs = ''; + if (defined(my $q = $self->{'q'})) { + $q = uri_escape($q, MID_ESC); + $q =~ s/%20/+/g; # improve URL readability + $qs .= "q=$q"; + } if (my $o = $self->{o}) { # ignore o == 0 $qs .= "&o=$o"; } diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index e13359d5..c0c801b3 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -219,7 +219,7 @@ sub search_nav_top { $rv .= qq{</pre></form><pre>}; } -sub search_nav_bot { +sub search_nav_bot { # also used by WwwListing for searching extindex miscidx my ($mset, $q) = @_; my $total = $mset->get_matches_estimated; my $l = $q->{l}; diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm index f28eddf1..eb015742 100644 --- a/lib/PublicInbox/WwwListing.pm +++ b/lib/PublicInbox/WwwListing.pm @@ -5,7 +5,7 @@ # Used by PublicInbox::WWW package PublicInbox::WwwListing; use strict; -use PublicInbox::Hval qw(prurl fmt_ts); +use PublicInbox::Hval qw(prurl fmt_ts ascii_html); use PublicInbox::Linkify; use PublicInbox::GzipFilter qw(gzf_maybe); use PublicInbox::ConfigIter; @@ -13,18 +13,19 @@ use PublicInbox::WwwStream; use bytes (); # bytes::length sub ibx_entry { - my ($ctx, $ibx) = @_; - my $mtime = $ibx->modified; - my $ts = fmt_ts($mtime); + my ($ctx, $ibx, $ce) = @_; + $ce->{description} //= $ibx->description; + my $ts = fmt_ts($ce->{-modified} //= $ibx->modified); my $url = prurl($ctx->{env}, $ibx->{url}); my $tmp = <<""; * $ts - $url - ${\$ibx->description} + $ce->{description} if (defined(my $info_url = $ibx->{infourl})) { $tmp .= ' ' . prurl($ctx->{env}, $info_url) . "\n"; } - push @{$ctx->{-list}}, [ $mtime, $tmp ]; + push(@{$ctx->{-list}}, (scalar(@_) == 3 ? # $misc in use, already sorted + $tmp : [ $ce->{-modified}, $tmp ] )); } sub list_match_i { # ConfigIter callback @@ -41,7 +42,7 @@ sub list_match_i { # ConfigIter callback } } -sub url_regexp { +sub url_filter { my ($ctx, $key, $default) = @_; $key //= 'publicInbox.wwwListing'; $default //= '404'; @@ -50,9 +51,9 @@ again: if ($v eq 'match=domain') { my $h = $ctx->{env}->{HTTP_HOST} // $ctx->{env}->{SERVER_NAME}; $h =~ s/:[0-9]+\z//; - qr!\A(?:https?:)?//\Q$h\E(?::[0-9]+)?/!i; + (qr!\A(?:https?:)?//\Q$h\E(?::[0-9]+)?/!i, "url:$h"); } elsif ($v eq 'all') { - qr/./; + (qr/./, undef); } elsif ($v eq '404') { undef; } else { @@ -67,22 +68,122 @@ EOF sub hide_key { 'www' } +sub add_misc_ibx { # MiscSearch->retry_reopen callback + my ($misc, $ctx, $re, $qs) = @_; + require PublicInbox::SearchQuery; + my $q = $ctx->{-sq} = PublicInbox::SearchQuery->new($ctx->{qp}); + my $o = $q->{o}; + my ($asc, $min, $max); + if ($o < 0) { + $asc = 1; + $o = -($o + 1); # so [-1] is the last element, like Perl lists + } + my $r = $q->{r}; + my $opt = { + offset => $o, + asc => $asc, + relevance => $r, + limit => $q->{l} + }; + $qs .= ' type:inbox'; + if (my $user_query = $q->{'q'}) { + $qs = "( $qs ) AND ( $user_query )"; + } + my $mset = $misc->mset($qs, $opt); # sorts by $MODIFIED (mtime) + $ctx->{-list} = []; + my $pi_cfg = $ctx->{www}->{pi_cfg}; + for my $mi ($mset->items) { + my $doc = $mi->get_document; + my ($eidx_key) = PublicInbox::Search::xap_terms('Q', $doc); + $eidx_key // next; + my $ibx = $pi_cfg->lookup_eidx_key($eidx_key) // next; + next if $ibx->{-hide}->{$ctx->hide_key}; + grep(/$re/, @{$ibx->{url}}) or next; + $ctx->ibx_entry($ibx, $misc->doc2ibx_cache_ent($doc)); + if ($r) { # for descriptions in search_nav_bot + my $pct = PublicInbox::Search::get_pct($mi); + # only when sorting by relevance, ->items is always + # ordered descending: + $max //= $pct; + $min = $pct; + } + } + if ($r) { # for descriptions in search_nav_bot + $q->{-min_pct} = $min; + $q->{-max_pct} = $max; + } + $ctx->{-mset} = $mset; + psgi_triple($ctx); +} + sub response { my ($class, $ctx) = @_; bless $ctx, $class; - if (my $ALL = $ctx->{www}->{pi_cfg}->ALL) { - $ALL->misc->reopen; - } - my $re = $ctx->url_regexp or return $ctx->psgi_triple; - my $iter = PublicInbox::ConfigIter->new($ctx->{www}->{pi_cfg}, + my ($re, $qs) = $ctx->url_filter; + $re // return $ctx->psgi_triple; + if (my $ALL = $ctx->{www}->{pi_cfg}->ALL) { # fast path + $ALL->misc->reopen->retry_reopen(\&add_misc_ibx, + $ctx, $re, $qs); + } else { # slow path, no [extindex "all"] configured + my $iter = PublicInbox::ConfigIter->new($ctx->{www}->{pi_cfg}, \&list_match_i, $re, $ctx); - sub { - $ctx->{-wcb} = $_[0]; # HTTP server callback - $ctx->{env}->{'pi-httpd.async'} ? - $iter->event_step : $iter->each_section; + sub { + $ctx->{-wcb} = $_[0]; # HTTP server callback + $ctx->{env}->{'pi-httpd.async'} ? + $iter->event_step : $iter->each_section; + } } } +sub mset_footer ($$) { + my ($ctx, $mset) = @_; + # no footer if too few matches + return '' if $mset->get_matches_estimated == $mset->size; + require PublicInbox::SearchView; + PublicInbox::SearchView::search_nav_bot($mset, $ctx->{-sq}); +} + +sub mset_nav_top { + my ($ctx, $mset) = @_; + my $q = $ctx->{-sq}; + my $qh = $q->{'q'} // ''; + utf8::decode($qh); + $qh = ascii_html($qh); + $qh = qq[\nvalue="$qh"] if $qh ne ''; + my $rv = <<EOM; +<form +action="./"><pre><input +name=q +type=text$qh /><input +type=submit +value="locate inbox" /></pre></form><pre> +EOM + chomp $rv; + if (defined($q->{'q'})) { + my $initial_q = $ctx->{-uxs_retried}; + if (defined $initial_q) { + my $rewritten = $q->{'q'}; + utf8::decode($initial_q); + utf8::decode($rewritten); + $initial_q = ascii_html($initial_q); + $rewritten = ascii_html($rewritten); + $rv .= " Warning: Initial query:\n <b>$initial_q</b>\n"; + $rv .= " returned no results, used:\n"; + $rv .= " <b>$rewritten</b>\n instead\n\n"; + } + $rv .= 'Search results ordered by ['; + if ($q->{r}) { + my $d = $q->qs_html(r => 0); + $rv .= qq{<a\nhref="?$d">updated</a>|<b>relevance</b>}; + } else { + my $d = $q->qs_html(r => 1); + $rv .= qq{<b>updated</b>|<a\nhref="?$d">relevance</a>}; + } + $rv .= ']'; + } + $rv .= qq{</pre>}; +} + sub psgi_triple { my ($ctx) = @_; my $h = [ 'Content-Type', 'text/html; charset=UTF-8', @@ -90,17 +191,23 @@ sub psgi_triple { my $gzf = gzf_maybe($h, $ctx->{env}); $gzf->zmore('<html><head><title>' . 'public-inbox listing</title>' . - '</head><body><pre>'); + '</head><body>'); my $code = 404; - if (my $list = $ctx->{-list}) { + if (my $list = delete $ctx->{-list}) { + my $mset = delete $ctx->{-mset}; $code = 200; - # sort by ->modified - @$list = map { $_->[1] } sort { $b->[0] <=> $a->[0] } @$list; + if ($mset) { # already sorted, so search bar: + $gzf->zmore(mset_nav_top($ctx, $mset)); + } else { # sort config dump by ->modified + @$list = map { $_->[1] } + sort { $b->[0] <=> $a->[0] } @$list; + } $list = join("\n", @$list); my $l = PublicInbox::Linkify->new; - $gzf->zmore($l->to_html($list)); + $gzf->zmore('<pre>'.$l->to_html($list)); + $gzf->zmore(mset_footer($ctx, $mset)) if $mset; } else { - $gzf->zmore('no inboxes, yet'); + $gzf->zmore('<pre>no inboxes, yet'); } my $out = $gzf->zflush('</pre><hr><pre>'. PublicInbox::WwwStream::code_footer($ctx->{env}) . |