From 520be116e8a686cb223b48fad1de29201dee45be Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 23 Jun 2021 07:14:21 -0400 Subject: www_listing: start updating for pagination + search When dealing with thousands of inboxes, displaying all of them on a single page isn't going to work. So steal some pagination and search results code from the message search to generate some basic HTML output that looks good in w3m. --- lib/PublicInbox/Config.pm | 5 ++ lib/PublicInbox/ManifestJsGz.pm | 2 +- lib/PublicInbox/MiscSearch.pm | 34 +++++---- lib/PublicInbox/SearchQuery.pm | 13 ++-- lib/PublicInbox/SearchView.pm | 2 +- lib/PublicInbox/WwwListing.pm | 155 +++++++++++++++++++++++++++++++++------- 6 files changed, 164 insertions(+), 47 deletions(-) diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index 3f0f5a01..36f2fafb 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -97,6 +97,11 @@ sub lookup_ei { $self->{-ei_by_name}->{$name} //= _fill_ei($self, $name); } +sub lookup_eidx_key { + my ($self, $eidx_key) = @_; + _lookup_fill($self, '-by_eidx_key', $eidx_key); +} + # special case for [extindex "all"] sub ALL { lookup_ei($_[0], 'all') } diff --git a/lib/PublicInbox/ManifestJsGz.pm b/lib/PublicInbox/ManifestJsGz.pm index 31cf15dc..e7bb0e86 100644 --- a/lib/PublicInbox/ManifestJsGz.pm +++ b/lib/PublicInbox/ManifestJsGz.pm @@ -81,7 +81,7 @@ sub ibx_entry { warn "E: $@" if $@; } -sub hide_key { 'manifest' } +sub hide_key { 'manifest' } # for WwwListing->list_match_i # overrides WwwListing->psgi_triple sub psgi_triple { diff --git a/lib/PublicInbox/MiscSearch.pm b/lib/PublicInbox/MiscSearch.pm index 4e010453..6b575b0d 100644 --- a/lib/PublicInbox/MiscSearch.pm +++ b/lib/PublicInbox/MiscSearch.pm @@ -59,7 +59,7 @@ sub misc_enquire_once { # retry_reopen callback $eq->set_query($qr); my $desc = !$opt->{asc}; my $rel = $opt->{relevance} // 0; - if ($rel == -1) { # ORDER BY docid/UID + if ($rel == -1) { # ORDER BY docid $eq->set_docid_order($PublicInbox::Search::ENQ_ASCENDING); $eq->set_weighting_scheme($PublicInbox::Search::X{BoolWeight}->new); } elsif ($rel) { @@ -132,6 +132,23 @@ sub ibx_data_once { } } +sub doc2ibx_cache_ent { # @_ == ($self, $doc) OR ($doc) + my ($doc) = $_[-1]; + my $d; + my $data = $json->decode($doc->get_data); + for (values %$data) { + $d = $_->{description} // next; + $d =~ s/ \[epoch [0-9]+\]\z// or next; + last; + } + { + uidvalidity => int_val($doc, $UIDVALIDITY), + -modified => int_val($doc, $MODIFIED), + # extract description from manifest.js.gz epoch description + description => $d + }; +} + sub inbox_data { my ($self, $ibx) = @_; retry_reopen($self, \&ibx_data_once, $ibx); @@ -141,20 +158,7 @@ sub ibx_cache_load { my ($doc, $cache) = @_; my ($eidx_key) = xap_terms('Q', $doc); return unless defined($eidx_key); # expired - my $ce = $cache->{$eidx_key} = {}; - $ce->{uidvalidity} = int_val($doc, $UIDVALIDITY); - $ce->{-modified} = int_val($doc, $MODIFIED); - $ce->{description} = do { - # extract description from manifest.js.gz epoch description - my $d; - my $data = $json->decode($doc->get_data); - for (values %$data) { - $d = $_->{description} // next; - $d =~ s/ \[epoch [0-9]+\]\z// or next; - last; - } - $d; - } + $cache->{$eidx_key} = doc2ibx_cache_ent($doc); } sub _nntpd_cache_load { # retry_reopen callback diff --git a/lib/PublicInbox/SearchQuery.pm b/lib/PublicInbox/SearchQuery.pm index 0f360500..a6b7d843 100644 --- a/lib/PublicInbox/SearchQuery.pm +++ b/lib/PublicInbox/SearchQuery.pm @@ -1,7 +1,7 @@ # Copyright (C) 2015-2021 all contributors # License: AGPL-3.0+ -# used by PublicInbox::SearchView +# used by PublicInbox::SearchView and PublicInbox::WwwListing package PublicInbox::SearchQuery; use strict; use v5.10.1; @@ -32,11 +32,12 @@ sub qs_html { if (scalar(keys(%override))) { $self = bless { (%$self, %override) }, ref($self); } - - my $q = uri_escape($self->{'q'}, MID_ESC); - $q =~ s/%20/+/g; # improve URL readability - my $qs = "q=$q"; - + my $qs = ''; + if (defined(my $q = $self->{'q'})) { + $q = uri_escape($q, MID_ESC); + $q =~ s/%20/+/g; # improve URL readability + $qs .= "q=$q"; + } if (my $o = $self->{o}) { # ignore o == 0 $qs .= "&o=$o"; } diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index e13359d5..c0c801b3 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -219,7 +219,7 @@ sub search_nav_top { $rv .= qq{
};
 }
 
-sub search_nav_bot {
+sub search_nav_bot { # also used by WwwListing for searching extindex miscidx
 	my ($mset, $q) = @_;
 	my $total = $mset->get_matches_estimated;
 	my $l = $q->{l};
diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm
index f28eddf1..eb015742 100644
--- a/lib/PublicInbox/WwwListing.pm
+++ b/lib/PublicInbox/WwwListing.pm
@@ -5,7 +5,7 @@
 # Used by PublicInbox::WWW
 package PublicInbox::WwwListing;
 use strict;
-use PublicInbox::Hval qw(prurl fmt_ts);
+use PublicInbox::Hval qw(prurl fmt_ts ascii_html);
 use PublicInbox::Linkify;
 use PublicInbox::GzipFilter qw(gzf_maybe);
 use PublicInbox::ConfigIter;
@@ -13,18 +13,19 @@ use PublicInbox::WwwStream;
 use bytes (); # bytes::length
 
 sub ibx_entry {
-	my ($ctx, $ibx) = @_;
-	my $mtime = $ibx->modified;
-	my $ts = fmt_ts($mtime);
+	my ($ctx, $ibx, $ce) = @_;
+	$ce->{description} //= $ibx->description;
+	my $ts = fmt_ts($ce->{-modified} //= $ibx->modified);
 	my $url = prurl($ctx->{env}, $ibx->{url});
 	my $tmp = <<"";
 * $ts - $url
-  ${\$ibx->description}
+  $ce->{description}
 
 	if (defined(my $info_url = $ibx->{infourl})) {
 		$tmp .= '  ' . prurl($ctx->{env}, $info_url) . "\n";
 	}
-	push @{$ctx->{-list}}, [ $mtime, $tmp ];
+	push(@{$ctx->{-list}}, (scalar(@_) == 3 ? # $misc in use, already sorted
+				$tmp : [ $ce->{-modified}, $tmp ] ));
 }
 
 sub list_match_i { # ConfigIter callback
@@ -41,7 +42,7 @@ sub list_match_i { # ConfigIter callback
 	}
 }
 
-sub url_regexp {
+sub url_filter {
 	my ($ctx, $key, $default) = @_;
 	$key //= 'publicInbox.wwwListing';
 	$default //= '404';
@@ -50,9 +51,9 @@ again:
 	if ($v eq 'match=domain') {
 		my $h = $ctx->{env}->{HTTP_HOST} // $ctx->{env}->{SERVER_NAME};
 		$h =~ s/:[0-9]+\z//;
-		qr!\A(?:https?:)?//\Q$h\E(?::[0-9]+)?/!i;
+		(qr!\A(?:https?:)?//\Q$h\E(?::[0-9]+)?/!i, "url:$h");
 	} elsif ($v eq 'all') {
-		qr/./;
+		(qr/./, undef);
 	} elsif ($v eq '404') {
 		undef;
 	} else {
@@ -67,22 +68,122 @@ EOF
 
 sub hide_key { 'www' }
 
+sub add_misc_ibx { # MiscSearch->retry_reopen callback
+	my ($misc, $ctx, $re, $qs) = @_;
+	require PublicInbox::SearchQuery;
+	my $q = $ctx->{-sq} = PublicInbox::SearchQuery->new($ctx->{qp});
+	my $o = $q->{o};
+	my ($asc, $min, $max);
+	if ($o < 0) {
+		$asc = 1;
+		$o = -($o + 1); # so [-1] is the last element, like Perl lists
+	}
+	my $r = $q->{r};
+	my $opt = {
+		offset => $o,
+		asc => $asc,
+		relevance => $r,
+		limit => $q->{l}
+	};
+	$qs .= ' type:inbox';
+	if (my $user_query = $q->{'q'}) {
+		$qs = "( $qs ) AND ( $user_query )";
+	}
+	my $mset = $misc->mset($qs, $opt); # sorts by $MODIFIED (mtime)
+	$ctx->{-list} = [];
+	my $pi_cfg = $ctx->{www}->{pi_cfg};
+	for my $mi ($mset->items) {
+		my $doc = $mi->get_document;
+		my ($eidx_key) = PublicInbox::Search::xap_terms('Q', $doc);
+		$eidx_key // next;
+		my $ibx = $pi_cfg->lookup_eidx_key($eidx_key) // next;
+		next if $ibx->{-hide}->{$ctx->hide_key};
+		grep(/$re/, @{$ibx->{url}}) or next;
+		$ctx->ibx_entry($ibx, $misc->doc2ibx_cache_ent($doc));
+		if ($r) { # for descriptions in search_nav_bot
+			my $pct = PublicInbox::Search::get_pct($mi);
+			# only when sorting by relevance, ->items is always
+			# ordered descending:
+			$max //= $pct;
+			$min = $pct;
+		}
+	}
+	if ($r) { # for descriptions in search_nav_bot
+		$q->{-min_pct} = $min;
+		$q->{-max_pct} = $max;
+	}
+	$ctx->{-mset} = $mset;
+	psgi_triple($ctx);
+}
+
 sub response {
 	my ($class, $ctx) = @_;
 	bless $ctx, $class;
-	if (my $ALL = $ctx->{www}->{pi_cfg}->ALL) {
-		$ALL->misc->reopen;
-	}
-	my $re = $ctx->url_regexp or return $ctx->psgi_triple;
-	my $iter = PublicInbox::ConfigIter->new($ctx->{www}->{pi_cfg},
+	my ($re, $qs) = $ctx->url_filter;
+	$re // return $ctx->psgi_triple;
+	if (my $ALL = $ctx->{www}->{pi_cfg}->ALL) { # fast path
+		$ALL->misc->reopen->retry_reopen(\&add_misc_ibx,
+						$ctx, $re, $qs);
+	} else { # slow path, no [extindex "all"] configured
+		my $iter = PublicInbox::ConfigIter->new($ctx->{www}->{pi_cfg},
 						\&list_match_i, $re, $ctx);
-	sub {
-		$ctx->{-wcb} = $_[0]; # HTTP server callback
-		$ctx->{env}->{'pi-httpd.async'} ?
-				$iter->event_step : $iter->each_section;
+		sub {
+			$ctx->{-wcb} = $_[0]; # HTTP server callback
+			$ctx->{env}->{'pi-httpd.async'} ?
+					$iter->event_step : $iter->each_section;
+		}
 	}
 }
 
+sub mset_footer ($$) {
+	my ($ctx, $mset) = @_;
+	# no footer if too few matches
+	return '' if $mset->get_matches_estimated == $mset->size;
+	require PublicInbox::SearchView;
+	PublicInbox::SearchView::search_nav_bot($mset, $ctx->{-sq});
+}
+
+sub mset_nav_top {
+	my ($ctx, $mset) = @_;
+	my $q = $ctx->{-sq};
+	my $qh = $q->{'q'} // '';
+	utf8::decode($qh);
+	$qh = ascii_html($qh);
+	$qh = qq[\nvalue="$qh"] if $qh ne '';
+	my $rv = <
+EOM
+	chomp $rv;
+	if (defined($q->{'q'})) {
+		my $initial_q = $ctx->{-uxs_retried};
+		if (defined $initial_q) {
+			my $rewritten = $q->{'q'};
+			utf8::decode($initial_q);
+			utf8::decode($rewritten);
+			$initial_q = ascii_html($initial_q);
+			$rewritten = ascii_html($rewritten);
+			$rv .= " Warning: Initial query:\n $initial_q\n";
+			$rv .= " returned no results, used:\n";
+			$rv .= " $rewritten\n instead\n\n";
+		}
+		$rv .= 'Search results ordered by [';
+		if ($q->{r}) {
+			my $d = $q->qs_html(r => 0);
+			$rv .= qq{updated|relevance};
+		} else {
+			my $d = $q->qs_html(r => 1);
+			$rv .= qq{updated|relevance};
+		}
+		$rv .= ']';
+	}
+	$rv .= qq{
}; +} + sub psgi_triple { my ($ctx) = @_; my $h = [ 'Content-Type', 'text/html; charset=UTF-8', @@ -90,17 +191,23 @@ sub psgi_triple { my $gzf = gzf_maybe($h, $ctx->{env}); $gzf->zmore('' . 'public-inbox listing' . - '
');
+				'');
 	my $code = 404;
-	if (my $list = $ctx->{-list}) {
+	if (my $list = delete $ctx->{-list}) {
+		my $mset = delete $ctx->{-mset};
 		$code = 200;
-		# sort by ->modified
-		@$list = map { $_->[1] } sort { $b->[0] <=> $a->[0] } @$list;
+		if ($mset) { # already sorted, so search bar:
+			$gzf->zmore(mset_nav_top($ctx, $mset));
+		} else { # sort config dump by ->modified
+			@$list = map { $_->[1] }
+				sort { $b->[0] <=> $a->[0] } @$list;
+		}
 		$list = join("\n", @$list);
 		my $l = PublicInbox::Linkify->new;
-		$gzf->zmore($l->to_html($list));
+		$gzf->zmore('
'.$l->to_html($list));
+		$gzf->zmore(mset_footer($ctx, $mset)) if $mset;
 	} else {
-		$gzf->zmore('no inboxes, yet');
+		$gzf->zmore('
no inboxes, yet');
 	}
 	my $out = $gzf->zflush('

'.
 			PublicInbox::WwwStream::code_footer($ctx->{env}) .
-- 
cgit v1.2.3-24-ge0c7