about summary refs log tree commit homepage
path: root/lib/PublicInbox
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2021-06-23 07:14:21 -0400
committerEric Wong <e@80x24.org>2021-06-23 19:24:49 +0000
commit520be116e8a686cb223b48fad1de29201dee45be (patch)
treea20d7a81be01fd67c6346efe5e06f05be02f58d8 /lib/PublicInbox
parent4d594e98063aaad1ce9a90709af7edc5c44a0163 (diff)
downloadpublic-inbox-520be116e8a686cb223b48fad1de29201dee45be.tar.gz
When dealing with thousands of inboxes, displaying all of
them on a single page isn't going to work.  So steal some
pagination and search results code from the message search
to generate some basic HTML output that looks good in w3m.
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r--lib/PublicInbox/Config.pm5
-rw-r--r--lib/PublicInbox/ManifestJsGz.pm2
-rw-r--r--lib/PublicInbox/MiscSearch.pm34
-rw-r--r--lib/PublicInbox/SearchQuery.pm13
-rw-r--r--lib/PublicInbox/SearchView.pm2
-rw-r--r--lib/PublicInbox/WwwListing.pm155
6 files changed, 164 insertions, 47 deletions
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index 3f0f5a01..36f2fafb 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -97,6 +97,11 @@ sub lookup_ei {
         $self->{-ei_by_name}->{$name} //= _fill_ei($self, $name);
 }
 
+sub lookup_eidx_key {
+        my ($self, $eidx_key) = @_;
+        _lookup_fill($self, '-by_eidx_key', $eidx_key);
+}
+
 # special case for [extindex "all"]
 sub ALL { lookup_ei($_[0], 'all') }
 
diff --git a/lib/PublicInbox/ManifestJsGz.pm b/lib/PublicInbox/ManifestJsGz.pm
index 31cf15dc..e7bb0e86 100644
--- a/lib/PublicInbox/ManifestJsGz.pm
+++ b/lib/PublicInbox/ManifestJsGz.pm
@@ -81,7 +81,7 @@ sub ibx_entry {
         warn "E: $@" if $@;
 }
 
-sub hide_key { 'manifest' }
+sub hide_key { 'manifest' } # for WwwListing->list_match_i
 
 # overrides WwwListing->psgi_triple
 sub psgi_triple {
diff --git a/lib/PublicInbox/MiscSearch.pm b/lib/PublicInbox/MiscSearch.pm
index 4e010453..6b575b0d 100644
--- a/lib/PublicInbox/MiscSearch.pm
+++ b/lib/PublicInbox/MiscSearch.pm
@@ -59,7 +59,7 @@ sub misc_enquire_once { # retry_reopen callback
         $eq->set_query($qr);
         my $desc = !$opt->{asc};
         my $rel = $opt->{relevance} // 0;
-        if ($rel == -1) { # ORDER BY docid/UID
+        if ($rel == -1) { # ORDER BY docid
                 $eq->set_docid_order($PublicInbox::Search::ENQ_ASCENDING);
                 $eq->set_weighting_scheme($PublicInbox::Search::X{BoolWeight}->new);
         } elsif ($rel) {
@@ -132,6 +132,23 @@ sub ibx_data_once {
         }
 }
 
+sub doc2ibx_cache_ent { # @_ == ($self, $doc) OR ($doc)
+        my ($doc) = $_[-1];
+        my $d;
+        my $data = $json->decode($doc->get_data);
+        for (values %$data) {
+                $d = $_->{description} // next;
+                $d =~ s/ \[epoch [0-9]+\]\z// or next;
+                last;
+        }
+        {
+                uidvalidity => int_val($doc, $UIDVALIDITY),
+                -modified => int_val($doc, $MODIFIED),
+                # extract description from manifest.js.gz epoch description
+                description => $d
+        };
+}
+
 sub inbox_data {
         my ($self, $ibx) = @_;
         retry_reopen($self, \&ibx_data_once, $ibx);
@@ -141,20 +158,7 @@ sub ibx_cache_load {
         my ($doc, $cache) = @_;
         my ($eidx_key) = xap_terms('Q', $doc);
         return unless defined($eidx_key); # expired
-        my $ce = $cache->{$eidx_key} = {};
-        $ce->{uidvalidity} = int_val($doc, $UIDVALIDITY);
-        $ce->{-modified} = int_val($doc, $MODIFIED);
-        $ce->{description} = do {
-                # extract description from manifest.js.gz epoch description
-                my $d;
-                my $data = $json->decode($doc->get_data);
-                for (values %$data) {
-                        $d = $_->{description} // next;
-                        $d =~ s/ \[epoch [0-9]+\]\z// or next;
-                        last;
-                }
-                $d;
-        }
+        $cache->{$eidx_key} = doc2ibx_cache_ent($doc);
 }
 
 sub _nntpd_cache_load { # retry_reopen callback
diff --git a/lib/PublicInbox/SearchQuery.pm b/lib/PublicInbox/SearchQuery.pm
index 0f360500..a6b7d843 100644
--- a/lib/PublicInbox/SearchQuery.pm
+++ b/lib/PublicInbox/SearchQuery.pm
@@ -1,7 +1,7 @@
 # Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
-# used by PublicInbox::SearchView
+# used by PublicInbox::SearchView and PublicInbox::WwwListing
 package PublicInbox::SearchQuery;
 use strict;
 use v5.10.1;
@@ -32,11 +32,12 @@ sub qs_html {
         if (scalar(keys(%override))) {
                 $self = bless { (%$self, %override) }, ref($self);
         }
-
-        my $q = uri_escape($self->{'q'}, MID_ESC);
-        $q =~ s/%20/+/g; # improve URL readability
-        my $qs = "q=$q";
-
+        my $qs = '';
+        if (defined(my $q = $self->{'q'})) {
+                $q = uri_escape($q, MID_ESC);
+                $q =~ s/%20/+/g; # improve URL readability
+                $qs .= "q=$q";
+        }
         if (my $o = $self->{o}) { # ignore o == 0
                 $qs .= "&amp;o=$o";
         }
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index e13359d5..c0c801b3 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -219,7 +219,7 @@ sub search_nav_top {
         $rv .= qq{</pre></form><pre>};
 }
 
-sub search_nav_bot {
+sub search_nav_bot { # also used by WwwListing for searching extindex miscidx
         my ($mset, $q) = @_;
         my $total = $mset->get_matches_estimated;
         my $l = $q->{l};
diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm
index f28eddf1..eb015742 100644
--- a/lib/PublicInbox/WwwListing.pm
+++ b/lib/PublicInbox/WwwListing.pm
@@ -5,7 +5,7 @@
 # Used by PublicInbox::WWW
 package PublicInbox::WwwListing;
 use strict;
-use PublicInbox::Hval qw(prurl fmt_ts);
+use PublicInbox::Hval qw(prurl fmt_ts ascii_html);
 use PublicInbox::Linkify;
 use PublicInbox::GzipFilter qw(gzf_maybe);
 use PublicInbox::ConfigIter;
@@ -13,18 +13,19 @@ use PublicInbox::WwwStream;
 use bytes (); # bytes::length
 
 sub ibx_entry {
-        my ($ctx, $ibx) = @_;
-        my $mtime = $ibx->modified;
-        my $ts = fmt_ts($mtime);
+        my ($ctx, $ibx, $ce) = @_;
+        $ce->{description} //= $ibx->description;
+        my $ts = fmt_ts($ce->{-modified} //= $ibx->modified);
         my $url = prurl($ctx->{env}, $ibx->{url});
         my $tmp = <<"";
 * $ts - $url
-  ${\$ibx->description}
+  $ce->{description}
 
         if (defined(my $info_url = $ibx->{infourl})) {
                 $tmp .= '  ' . prurl($ctx->{env}, $info_url) . "\n";
         }
-        push @{$ctx->{-list}}, [ $mtime, $tmp ];
+        push(@{$ctx->{-list}}, (scalar(@_) == 3 ? # $misc in use, already sorted
+                                $tmp : [ $ce->{-modified}, $tmp ] ));
 }
 
 sub list_match_i { # ConfigIter callback
@@ -41,7 +42,7 @@ sub list_match_i { # ConfigIter callback
         }
 }
 
-sub url_regexp {
+sub url_filter {
         my ($ctx, $key, $default) = @_;
         $key //= 'publicInbox.wwwListing';
         $default //= '404';
@@ -50,9 +51,9 @@ again:
         if ($v eq 'match=domain') {
                 my $h = $ctx->{env}->{HTTP_HOST} // $ctx->{env}->{SERVER_NAME};
                 $h =~ s/:[0-9]+\z//;
-                qr!\A(?:https?:)?//\Q$h\E(?::[0-9]+)?/!i;
+                (qr!\A(?:https?:)?//\Q$h\E(?::[0-9]+)?/!i, "url:$h");
         } elsif ($v eq 'all') {
-                qr/./;
+                (qr/./, undef);
         } elsif ($v eq '404') {
                 undef;
         } else {
@@ -67,22 +68,122 @@ EOF
 
 sub hide_key { 'www' }
 
+sub add_misc_ibx { # MiscSearch->retry_reopen callback
+        my ($misc, $ctx, $re, $qs) = @_;
+        require PublicInbox::SearchQuery;
+        my $q = $ctx->{-sq} = PublicInbox::SearchQuery->new($ctx->{qp});
+        my $o = $q->{o};
+        my ($asc, $min, $max);
+        if ($o < 0) {
+                $asc = 1;
+                $o = -($o + 1); # so [-1] is the last element, like Perl lists
+        }
+        my $r = $q->{r};
+        my $opt = {
+                offset => $o,
+                asc => $asc,
+                relevance => $r,
+                limit => $q->{l}
+        };
+        $qs .= ' type:inbox';
+        if (my $user_query = $q->{'q'}) {
+                $qs = "( $qs ) AND ( $user_query )";
+        }
+        my $mset = $misc->mset($qs, $opt); # sorts by $MODIFIED (mtime)
+        $ctx->{-list} = [];
+        my $pi_cfg = $ctx->{www}->{pi_cfg};
+        for my $mi ($mset->items) {
+                my $doc = $mi->get_document;
+                my ($eidx_key) = PublicInbox::Search::xap_terms('Q', $doc);
+                $eidx_key // next;
+                my $ibx = $pi_cfg->lookup_eidx_key($eidx_key) // next;
+                next if $ibx->{-hide}->{$ctx->hide_key};
+                grep(/$re/, @{$ibx->{url}}) or next;
+                $ctx->ibx_entry($ibx, $misc->doc2ibx_cache_ent($doc));
+                if ($r) { # for descriptions in search_nav_bot
+                        my $pct = PublicInbox::Search::get_pct($mi);
+                        # only when sorting by relevance, ->items is always
+                        # ordered descending:
+                        $max //= $pct;
+                        $min = $pct;
+                }
+        }
+        if ($r) { # for descriptions in search_nav_bot
+                $q->{-min_pct} = $min;
+                $q->{-max_pct} = $max;
+        }
+        $ctx->{-mset} = $mset;
+        psgi_triple($ctx);
+}
+
 sub response {
         my ($class, $ctx) = @_;
         bless $ctx, $class;
-        if (my $ALL = $ctx->{www}->{pi_cfg}->ALL) {
-                $ALL->misc->reopen;
-        }
-        my $re = $ctx->url_regexp or return $ctx->psgi_triple;
-        my $iter = PublicInbox::ConfigIter->new($ctx->{www}->{pi_cfg},
+        my ($re, $qs) = $ctx->url_filter;
+        $re // return $ctx->psgi_triple;
+        if (my $ALL = $ctx->{www}->{pi_cfg}->ALL) { # fast path
+                $ALL->misc->reopen->retry_reopen(\&add_misc_ibx,
+                                                $ctx, $re, $qs);
+        } else { # slow path, no [extindex "all"] configured
+                my $iter = PublicInbox::ConfigIter->new($ctx->{www}->{pi_cfg},
                                                 \&list_match_i, $re, $ctx);
-        sub {
-                $ctx->{-wcb} = $_[0]; # HTTP server callback
-                $ctx->{env}->{'pi-httpd.async'} ?
-                                $iter->event_step : $iter->each_section;
+                sub {
+                        $ctx->{-wcb} = $_[0]; # HTTP server callback
+                        $ctx->{env}->{'pi-httpd.async'} ?
+                                        $iter->event_step : $iter->each_section;
+                }
         }
 }
 
+sub mset_footer ($$) {
+        my ($ctx, $mset) = @_;
+        # no footer if too few matches
+        return '' if $mset->get_matches_estimated == $mset->size;
+        require PublicInbox::SearchView;
+        PublicInbox::SearchView::search_nav_bot($mset, $ctx->{-sq});
+}
+
+sub mset_nav_top {
+        my ($ctx, $mset) = @_;
+        my $q = $ctx->{-sq};
+        my $qh = $q->{'q'} // '';
+        utf8::decode($qh);
+        $qh = ascii_html($qh);
+        $qh = qq[\nvalue="$qh"] if $qh ne '';
+        my $rv = <<EOM;
+<form
+action="./"><pre><input
+name=q
+type=text$qh /><input
+type=submit
+value="locate inbox" /></pre></form><pre>
+EOM
+        chomp $rv;
+        if (defined($q->{'q'})) {
+                my $initial_q = $ctx->{-uxs_retried};
+                if (defined $initial_q) {
+                        my $rewritten = $q->{'q'};
+                        utf8::decode($initial_q);
+                        utf8::decode($rewritten);
+                        $initial_q = ascii_html($initial_q);
+                        $rewritten = ascii_html($rewritten);
+                        $rv .= " Warning: Initial query:\n <b>$initial_q</b>\n";
+                        $rv .= " returned no results, used:\n";
+                        $rv .= " <b>$rewritten</b>\n instead\n\n";
+                }
+                $rv .= 'Search results ordered by [';
+                if ($q->{r}) {
+                        my $d = $q->qs_html(r => 0);
+                        $rv .= qq{<a\nhref="?$d">updated</a>|<b>relevance</b>};
+                } else {
+                        my $d = $q->qs_html(r => 1);
+                        $rv .= qq{<b>updated</b>|<a\nhref="?$d">relevance</a>};
+                }
+                $rv .= ']';
+        }
+        $rv .= qq{</pre>};
+}
+
 sub psgi_triple {
         my ($ctx) = @_;
         my $h = [ 'Content-Type', 'text/html; charset=UTF-8',
@@ -90,17 +191,23 @@ sub psgi_triple {
         my $gzf = gzf_maybe($h, $ctx->{env});
         $gzf->zmore('<html><head><title>' .
                                 'public-inbox listing</title>' .
-                                '</head><body><pre>');
+                                '</head><body>');
         my $code = 404;
-        if (my $list = $ctx->{-list}) {
+        if (my $list = delete $ctx->{-list}) {
+                my $mset = delete $ctx->{-mset};
                 $code = 200;
-                # sort by ->modified
-                @$list = map { $_->[1] } sort { $b->[0] <=> $a->[0] } @$list;
+                if ($mset) { # already sorted, so search bar:
+                        $gzf->zmore(mset_nav_top($ctx, $mset));
+                } else { # sort config dump by ->modified
+                        @$list = map { $_->[1] }
+                                sort { $b->[0] <=> $a->[0] } @$list;
+                }
                 $list = join("\n", @$list);
                 my $l = PublicInbox::Linkify->new;
-                $gzf->zmore($l->to_html($list));
+                $gzf->zmore('<pre>'.$l->to_html($list));
+                $gzf->zmore(mset_footer($ctx, $mset)) if $mset;
         } else {
-                $gzf->zmore('no inboxes, yet');
+                $gzf->zmore('<pre>no inboxes, yet');
         }
         my $out = $gzf->zflush('</pre><hr><pre>'.
                         PublicInbox::WwwStream::code_footer($ctx->{env}) .