about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2021-06-23 07:14:21 -0400
committerEric Wong <e@80x24.org>2021-06-23 19:24:49 +0000
commit520be116e8a686cb223b48fad1de29201dee45be (patch)
treea20d7a81be01fd67c6346efe5e06f05be02f58d8
parent4d594e98063aaad1ce9a90709af7edc5c44a0163 (diff)
downloadpublic-inbox-520be116e8a686cb223b48fad1de29201dee45be.tar.gz
When dealing with thousands of inboxes, displaying all of
them on a single page isn't going to work.  So steal some
pagination and search results code from the message search
to generate some basic HTML output that looks good in w3m.
-rw-r--r--lib/PublicInbox/Config.pm5
-rw-r--r--lib/PublicInbox/ManifestJsGz.pm2
-rw-r--r--lib/PublicInbox/MiscSearch.pm34
-rw-r--r--lib/PublicInbox/SearchQuery.pm13
-rw-r--r--lib/PublicInbox/SearchView.pm2
-rw-r--r--lib/PublicInbox/WwwListing.pm155
6 files changed, 164 insertions, 47 deletions
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index 3f0f5a01..36f2fafb 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -97,6 +97,11 @@ sub lookup_ei {
         $self->{-ei_by_name}->{$name} //= _fill_ei($self, $name);
 }
 
+sub lookup_eidx_key {
+        my ($self, $eidx_key) = @_;
+        _lookup_fill($self, '-by_eidx_key', $eidx_key);
+}
+
 # special case for [extindex "all"]
 sub ALL { lookup_ei($_[0], 'all') }
 
diff --git a/lib/PublicInbox/ManifestJsGz.pm b/lib/PublicInbox/ManifestJsGz.pm
index 31cf15dc..e7bb0e86 100644
--- a/lib/PublicInbox/ManifestJsGz.pm
+++ b/lib/PublicInbox/ManifestJsGz.pm
@@ -81,7 +81,7 @@ sub ibx_entry {
         warn "E: $@" if $@;
 }
 
-sub hide_key { 'manifest' }
+sub hide_key { 'manifest' } # for WwwListing->list_match_i
 
 # overrides WwwListing->psgi_triple
 sub psgi_triple {
diff --git a/lib/PublicInbox/MiscSearch.pm b/lib/PublicInbox/MiscSearch.pm
index 4e010453..6b575b0d 100644
--- a/lib/PublicInbox/MiscSearch.pm
+++ b/lib/PublicInbox/MiscSearch.pm
@@ -59,7 +59,7 @@ sub misc_enquire_once { # retry_reopen callback
         $eq->set_query($qr);
         my $desc = !$opt->{asc};
         my $rel = $opt->{relevance} // 0;
-        if ($rel == -1) { # ORDER BY docid/UID
+        if ($rel == -1) { # ORDER BY docid
                 $eq->set_docid_order($PublicInbox::Search::ENQ_ASCENDING);
                 $eq->set_weighting_scheme($PublicInbox::Search::X{BoolWeight}->new);
         } elsif ($rel) {
@@ -132,6 +132,23 @@ sub ibx_data_once {
         }
 }
 
+sub doc2ibx_cache_ent { # @_ == ($self, $doc) OR ($doc)
+        my ($doc) = $_[-1];
+        my $d;
+        my $data = $json->decode($doc->get_data);
+        for (values %$data) {
+                $d = $_->{description} // next;
+                $d =~ s/ \[epoch [0-9]+\]\z// or next;
+                last;
+        }
+        {
+                uidvalidity => int_val($doc, $UIDVALIDITY),
+                -modified => int_val($doc, $MODIFIED),
+                # extract description from manifest.js.gz epoch description
+                description => $d
+        };
+}
+
 sub inbox_data {
         my ($self, $ibx) = @_;
         retry_reopen($self, \&ibx_data_once, $ibx);
@@ -141,20 +158,7 @@ sub ibx_cache_load {
         my ($doc, $cache) = @_;
         my ($eidx_key) = xap_terms('Q', $doc);
         return unless defined($eidx_key); # expired
-        my $ce = $cache->{$eidx_key} = {};
-        $ce->{uidvalidity} = int_val($doc, $UIDVALIDITY);
-        $ce->{-modified} = int_val($doc, $MODIFIED);
-        $ce->{description} = do {
-                # extract description from manifest.js.gz epoch description
-                my $d;
-                my $data = $json->decode($doc->get_data);
-                for (values %$data) {
-                        $d = $_->{description} // next;
-                        $d =~ s/ \[epoch [0-9]+\]\z// or next;
-                        last;
-                }
-                $d;
-        }
+        $cache->{$eidx_key} = doc2ibx_cache_ent($doc);
 }
 
 sub _nntpd_cache_load { # retry_reopen callback
diff --git a/lib/PublicInbox/SearchQuery.pm b/lib/PublicInbox/SearchQuery.pm
index 0f360500..a6b7d843 100644
--- a/lib/PublicInbox/SearchQuery.pm
+++ b/lib/PublicInbox/SearchQuery.pm
@@ -1,7 +1,7 @@
 # Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
-# used by PublicInbox::SearchView
+# used by PublicInbox::SearchView and PublicInbox::WwwListing
 package PublicInbox::SearchQuery;
 use strict;
 use v5.10.1;
@@ -32,11 +32,12 @@ sub qs_html {
         if (scalar(keys(%override))) {
                 $self = bless { (%$self, %override) }, ref($self);
         }
-
-        my $q = uri_escape($self->{'q'}, MID_ESC);
-        $q =~ s/%20/+/g; # improve URL readability
-        my $qs = "q=$q";
-
+        my $qs = '';
+        if (defined(my $q = $self->{'q'})) {
+                $q = uri_escape($q, MID_ESC);
+                $q =~ s/%20/+/g; # improve URL readability
+                $qs .= "q=$q";
+        }
         if (my $o = $self->{o}) { # ignore o == 0
                 $qs .= "&amp;o=$o";
         }
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index e13359d5..c0c801b3 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -219,7 +219,7 @@ sub search_nav_top {
         $rv .= qq{</pre></form><pre>};
 }
 
-sub search_nav_bot {
+sub search_nav_bot { # also used by WwwListing for searching extindex miscidx
         my ($mset, $q) = @_;
         my $total = $mset->get_matches_estimated;
         my $l = $q->{l};
diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm
index f28eddf1..eb015742 100644
--- a/lib/PublicInbox/WwwListing.pm
+++ b/lib/PublicInbox/WwwListing.pm
@@ -5,7 +5,7 @@
 # Used by PublicInbox::WWW
 package PublicInbox::WwwListing;
 use strict;
-use PublicInbox::Hval qw(prurl fmt_ts);
+use PublicInbox::Hval qw(prurl fmt_ts ascii_html);
 use PublicInbox::Linkify;
 use PublicInbox::GzipFilter qw(gzf_maybe);
 use PublicInbox::ConfigIter;
@@ -13,18 +13,19 @@ use PublicInbox::WwwStream;
 use bytes (); # bytes::length
 
 sub ibx_entry {
-        my ($ctx, $ibx) = @_;
-        my $mtime = $ibx->modified;
-        my $ts = fmt_ts($mtime);
+        my ($ctx, $ibx, $ce) = @_;
+        $ce->{description} //= $ibx->description;
+        my $ts = fmt_ts($ce->{-modified} //= $ibx->modified);
         my $url = prurl($ctx->{env}, $ibx->{url});
         my $tmp = <<"";
 * $ts - $url
-  ${\$ibx->description}
+  $ce->{description}
 
         if (defined(my $info_url = $ibx->{infourl})) {
                 $tmp .= '  ' . prurl($ctx->{env}, $info_url) . "\n";
         }
-        push @{$ctx->{-list}}, [ $mtime, $tmp ];
+        push(@{$ctx->{-list}}, (scalar(@_) == 3 ? # $misc in use, already sorted
+                                $tmp : [ $ce->{-modified}, $tmp ] ));
 }
 
 sub list_match_i { # ConfigIter callback
@@ -41,7 +42,7 @@ sub list_match_i { # ConfigIter callback
         }
 }
 
-sub url_regexp {
+sub url_filter {
         my ($ctx, $key, $default) = @_;
         $key //= 'publicInbox.wwwListing';
         $default //= '404';
@@ -50,9 +51,9 @@ again:
         if ($v eq 'match=domain') {
                 my $h = $ctx->{env}->{HTTP_HOST} // $ctx->{env}->{SERVER_NAME};
                 $h =~ s/:[0-9]+\z//;
-                qr!\A(?:https?:)?//\Q$h\E(?::[0-9]+)?/!i;
+                (qr!\A(?:https?:)?//\Q$h\E(?::[0-9]+)?/!i, "url:$h");
         } elsif ($v eq 'all') {
-                qr/./;
+                (qr/./, undef);
         } elsif ($v eq '404') {
                 undef;
         } else {
@@ -67,22 +68,122 @@ EOF
 
 sub hide_key { 'www' }
 
+sub add_misc_ibx { # MiscSearch->retry_reopen callback
+        my ($misc, $ctx, $re, $qs) = @_;
+        require PublicInbox::SearchQuery;
+        my $q = $ctx->{-sq} = PublicInbox::SearchQuery->new($ctx->{qp});
+        my $o = $q->{o};
+        my ($asc, $min, $max);
+        if ($o < 0) {
+                $asc = 1;
+                $o = -($o + 1); # so [-1] is the last element, like Perl lists
+        }
+        my $r = $q->{r};
+        my $opt = {
+                offset => $o,
+                asc => $asc,
+                relevance => $r,
+                limit => $q->{l}
+        };
+        $qs .= ' type:inbox';
+        if (my $user_query = $q->{'q'}) {
+                $qs = "( $qs ) AND ( $user_query )";
+        }
+        my $mset = $misc->mset($qs, $opt); # sorts by $MODIFIED (mtime)
+        $ctx->{-list} = [];
+        my $pi_cfg = $ctx->{www}->{pi_cfg};
+        for my $mi ($mset->items) {
+                my $doc = $mi->get_document;
+                my ($eidx_key) = PublicInbox::Search::xap_terms('Q', $doc);
+                $eidx_key // next;
+                my $ibx = $pi_cfg->lookup_eidx_key($eidx_key) // next;
+                next if $ibx->{-hide}->{$ctx->hide_key};
+                grep(/$re/, @{$ibx->{url}}) or next;
+                $ctx->ibx_entry($ibx, $misc->doc2ibx_cache_ent($doc));
+                if ($r) { # for descriptions in search_nav_bot
+                        my $pct = PublicInbox::Search::get_pct($mi);
+                        # only when sorting by relevance, ->items is always
+                        # ordered descending:
+                        $max //= $pct;
+                        $min = $pct;
+                }
+        }
+        if ($r) { # for descriptions in search_nav_bot
+                $q->{-min_pct} = $min;
+                $q->{-max_pct} = $max;
+        }
+        $ctx->{-mset} = $mset;
+        psgi_triple($ctx);
+}
+
 sub response {
         my ($class, $ctx) = @_;
         bless $ctx, $class;
-        if (my $ALL = $ctx->{www}->{pi_cfg}->ALL) {
-                $ALL->misc->reopen;
-        }
-        my $re = $ctx->url_regexp or return $ctx->psgi_triple;
-        my $iter = PublicInbox::ConfigIter->new($ctx->{www}->{pi_cfg},
+        my ($re, $qs) = $ctx->url_filter;
+        $re // return $ctx->psgi_triple;
+        if (my $ALL = $ctx->{www}->{pi_cfg}->ALL) { # fast path
+                $ALL->misc->reopen->retry_reopen(\&add_misc_ibx,
+                                                $ctx, $re, $qs);
+        } else { # slow path, no [extindex "all"] configured
+                my $iter = PublicInbox::ConfigIter->new($ctx->{www}->{pi_cfg},
                                                 \&list_match_i, $re, $ctx);
-        sub {
-                $ctx->{-wcb} = $_[0]; # HTTP server callback
-                $ctx->{env}->{'pi-httpd.async'} ?
-                                $iter->event_step : $iter->each_section;
+                sub {
+                        $ctx->{-wcb} = $_[0]; # HTTP server callback
+                        $ctx->{env}->{'pi-httpd.async'} ?
+                                        $iter->event_step : $iter->each_section;
+                }
         }
 }
 
+sub mset_footer ($$) {
+        my ($ctx, $mset) = @_;
+        # no footer if too few matches
+        return '' if $mset->get_matches_estimated == $mset->size;
+        require PublicInbox::SearchView;
+        PublicInbox::SearchView::search_nav_bot($mset, $ctx->{-sq});
+}
+
+sub mset_nav_top {
+        my ($ctx, $mset) = @_;
+        my $q = $ctx->{-sq};
+        my $qh = $q->{'q'} // '';
+        utf8::decode($qh);
+        $qh = ascii_html($qh);
+        $qh = qq[\nvalue="$qh"] if $qh ne '';
+        my $rv = <<EOM;
+<form
+action="./"><pre><input
+name=q
+type=text$qh /><input
+type=submit
+value="locate inbox" /></pre></form><pre>
+EOM
+        chomp $rv;
+        if (defined($q->{'q'})) {
+                my $initial_q = $ctx->{-uxs_retried};
+                if (defined $initial_q) {
+                        my $rewritten = $q->{'q'};
+                        utf8::decode($initial_q);
+                        utf8::decode($rewritten);
+                        $initial_q = ascii_html($initial_q);
+                        $rewritten = ascii_html($rewritten);
+                        $rv .= " Warning: Initial query:\n <b>$initial_q</b>\n";
+                        $rv .= " returned no results, used:\n";
+                        $rv .= " <b>$rewritten</b>\n instead\n\n";
+                }
+                $rv .= 'Search results ordered by [';
+                if ($q->{r}) {
+                        my $d = $q->qs_html(r => 0);
+                        $rv .= qq{<a\nhref="?$d">updated</a>|<b>relevance</b>};
+                } else {
+                        my $d = $q->qs_html(r => 1);
+                        $rv .= qq{<b>updated</b>|<a\nhref="?$d">relevance</a>};
+                }
+                $rv .= ']';
+        }
+        $rv .= qq{</pre>};
+}
+
 sub psgi_triple {
         my ($ctx) = @_;
         my $h = [ 'Content-Type', 'text/html; charset=UTF-8',
@@ -90,17 +191,23 @@ sub psgi_triple {
         my $gzf = gzf_maybe($h, $ctx->{env});
         $gzf->zmore('<html><head><title>' .
                                 'public-inbox listing</title>' .
-                                '</head><body><pre>');
+                                '</head><body>');
         my $code = 404;
-        if (my $list = $ctx->{-list}) {
+        if (my $list = delete $ctx->{-list}) {
+                my $mset = delete $ctx->{-mset};
                 $code = 200;
-                # sort by ->modified
-                @$list = map { $_->[1] } sort { $b->[0] <=> $a->[0] } @$list;
+                if ($mset) { # already sorted, so search bar:
+                        $gzf->zmore(mset_nav_top($ctx, $mset));
+                } else { # sort config dump by ->modified
+                        @$list = map { $_->[1] }
+                                sort { $b->[0] <=> $a->[0] } @$list;
+                }
                 $list = join("\n", @$list);
                 my $l = PublicInbox::Linkify->new;
-                $gzf->zmore($l->to_html($list));
+                $gzf->zmore('<pre>'.$l->to_html($list));
+                $gzf->zmore(mset_footer($ctx, $mset)) if $mset;
         } else {
-                $gzf->zmore('no inboxes, yet');
+                $gzf->zmore('<pre>no inboxes, yet');
         }
         my $out = $gzf->zflush('</pre><hr><pre>'.
                         PublicInbox::WwwStream::code_footer($ctx->{env}) .