about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2023-11-10 03:09:59 +0000
committerEric Wong <e@80x24.org>2023-11-10 18:42:09 +0000
commit363c043a8a3f379a69802fc566112fcd8f1e750c (patch)
tree81661cb93fca46832223ccb150e1bb1aafc10c96
parentf3133719702954356caa3de4c7c26c667f1094d8 (diff)
downloadpublic-inbox-363c043a8a3f379a69802fc566112fcd8f1e750c.tar.gz
This seems like a easy (but WWW-specific) way to get recently
created and recently active topics as suggested by Konstantin.

To do this with Xapian will require a new columns and
reindexing; and I'm not sure if the current lei handling of
search results by dumping results to a format readable by common
MUAs would work well with this.  A new TUI may be required...

Suggested-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
Link: https://public-inbox.org/meta/20231107-skilled-cobra-of-swiftness-a6ff26@meerkat/
-rw-r--r--MANIFEST1
-rw-r--r--lib/PublicInbox/WWW.pm15
-rw-r--r--lib/PublicInbox/WwwAtomStream.pm11
-rw-r--r--lib/PublicInbox/WwwStream.pm1
-rw-r--r--lib/PublicInbox/WwwTopics.pm86
-rw-r--r--t/extindex-psgi.t8
-rw-r--r--t/plack.t10
7 files changed, 122 insertions, 10 deletions
diff --git a/MANIFEST b/MANIFEST
index 51dcffaf..e1c3dc97 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -371,6 +371,7 @@ lib/PublicInbox/WwwListing.pm
 lib/PublicInbox/WwwStatic.pm
 lib/PublicInbox/WwwStream.pm
 lib/PublicInbox/WwwText.pm
+lib/PublicInbox/WwwTopics.pm
 lib/PublicInbox/XapClient.pm
 lib/PublicInbox/XapHelper.pm
 lib/PublicInbox/XapHelperCxx.pm
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index d2bd68ea..6b616bd4 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -101,6 +101,9 @@ sub call {
                 invalid_inbox($ctx, $1) || get_atom($ctx);
         } elsif ($path_info =~ m!$INBOX_RE/new\.html\z!o) {
                 invalid_inbox($ctx, $1) || get_new($ctx);
+        } elsif ($path_info =~
+                        m!$INBOX_RE/topics_(new|active)\.(atom|html)\z!o) {
+                get_topics($ctx, $1, $2, $3);
         } elsif ($path_info =~ m!$INBOX_RE/description\z!o) {
                 get_description($ctx, $1);
         } elsif ($path_info =~ m!$INBOX_RE/(?:(?:git/)?([0-9]+)(?:\.git)?/)?
@@ -270,6 +273,13 @@ sub get_new {
         PublicInbox::Feed::new_html($ctx);
 }
 
+# /$INBOX/topics_(new|active).(html|atom)
+sub get_topics {
+        my ($ctx, $ibx_name, $category, $type) = @_;
+        require PublicInbox::WwwTopics;
+        PublicInbox::WwwTopics::response($ctx, $ibx_name, $category, $type);
+}
+
 # /$INBOX/?r=$GIT_COMMIT                 -> HTML only
 sub get_index {
         my ($ctx) = @_;
@@ -338,11 +348,12 @@ sub get_altid_dump {
 }
 
 sub need {
-        my ($ctx, $extra) = @_;
+        my ($ctx, $extra, $upref) = @_;
         require PublicInbox::WwwStream;
+        $upref //= '../';
         PublicInbox::WwwStream::html_oneshot($ctx, 501, <<EOF);
 <pre>$extra is not available for this public-inbox
-<a\nhref="../">Return to index</a></pre>
+<a\nhref="$upref">Return to index</a></pre>
 EOF
 }
 
diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm
index 737cc6cb..26b366f5 100644
--- a/lib/PublicInbox/WwwAtomStream.pm
+++ b/lib/PublicInbox/WwwAtomStream.pm
@@ -99,15 +99,16 @@ sub atom_header {
                 $base_url .= '?' . $search_q->qs_html(x => undef);
                 $self_url .= '?' . $search_q->qs_html;
                 $page_id = to_uuid("q\n".$query);
+        } elsif (defined(my $cat = $ctx->{topic_category})) {
+                $title = title_tag("$cat topics - ".$ibx->description);
+                $self_url .= "topics_$cat.atom";
         } else {
                 $title = title_tag($ibx->description);
                 $self_url .= 'new.atom';
-                if (defined(my $addr = $ibx->{-primary_address})) {
-                        $page_id = "mailto:$addr";
-                } else {
-                        $page_id = to_uuid($self_url);
-                }
+                my $addr = $ibx->{-primary_address};
+                $page_id = "mailto:$addr" if defined $addr;
         }
+        $page_id //= to_uuid($self_url);
         qq(<?xml version="1.0" encoding="us-ascii"?>\n) .
         qq(<feed\nxmlns="http://www.w3.org/2005/Atom"\n) .
         qq(xmlns:thr="http://purl.org/syndication/thread/1.0">) .
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index 4cbdda99..3a1d6edf 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -113,6 +113,7 @@ sub html_top ($) {
                         qq(<a\nid=mirror) .
                         qq(\nhref="${upfx}_/text/mirror/">mirror</a>$code / ).
                         qq(<a\nhref="$atom">Atom feed</a>);
+        $links .= delete($ctx->{-html_more_links}) if $ctx->{-html_more_links};
         if ($ibx->isrch) {
                 my $q_val = delete($ctx->{-q_value_html}) // '';
                 $q_val = qq(\nvalue="$q_val") if $q_val ne '';
diff --git a/lib/PublicInbox/WwwTopics.pm b/lib/PublicInbox/WwwTopics.pm
new file mode 100644
index 00000000..ad85a46d
--- /dev/null
+++ b/lib/PublicInbox/WwwTopics.pm
@@ -0,0 +1,86 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+package PublicInbox::WwwTopics;
+use v5.12;
+use PublicInbox::Hval qw(ascii_html mid_href fmt_ts);
+
+sub add_topic_html ($$) {
+        my (undef, $smsg) = @_;
+        my $s = ascii_html($smsg->{subject});
+        $s = '(no subject)' if $s eq '';
+        $_[0] .= "\n".fmt_ts($smsg->{'MAX(ds)'} // $smsg->{ds}) .
+                qq{ <a\nhref="}.mid_href($smsg->{mid}).qq{/#r">$s</a>};
+        my $nr = $smsg->{'COUNT(num)'};
+        $_[0] .= " $nr+ messages" if $nr > 1;
+}
+
+# n.b. the `SELECT DISTINCT(tid)' subquery is critical for performance
+# with giant inboxes and extindices
+sub topics_new ($) {
+        $_[0]->do_get(<<EOS);
+SELECT ds,ddd,COUNT(num) FROM over WHERE tid IN
+(SELECT DISTINCT(tid) FROM over WHERE tid > 0 ORDER BY ts DESC LIMIT 200)
+AND +num > 0
+GROUP BY tid
+ORDER BY ds ASC
+EOS
+}
+
+sub topics_active ($) {
+        $_[0]->do_get(<<EOS);
+SELECT ddd,MAX(ds),COUNT(num) FROM over WHERE tid IN
+(SELECT DISTINCT(tid) FROM over WHERE tid > 0 ORDER BY ts DESC LIMIT 200)
+AND +num > 0
+GROUP BY tid
+ORDER BY ds ASC
+EOS
+}
+
+sub topics_i { pop @{$_[0]->{msgs}} }
+
+sub topics_atom { # GET /$INBOX_NAME/topics_(new|active).atom
+        my ($ctx) = @_;
+        require PublicInbox::WwwAtomStream;
+        my ($hdr, $smsg, $val);
+        $_->{ds} //= $_->{'MAX(ds)'} // 0 for @{$ctx->{msgs}};
+        PublicInbox::WwwAtomStream->response($ctx, \&topics_i);
+}
+
+sub topics_html { # GET /$INBOX_NAME/topics_(new|active).html
+        my ($ctx) = @_;
+        require PublicInbox::WwwStream;
+        my $buf = '<pre>';
+        $ctx->{-html_more_links} = qq{\n- recent:[<a
+href="./">subjects (threaded)</a>|};
+
+        if ($ctx->{topic_category} eq 'new') {
+                $ctx->{-html_more_links} .= qq{<b>topics (new)</b>|<a
+href="./topics_active.html">topics (active)</a>]};
+        } else { # topic_category eq "active" - topics with recent replies
+                $ctx->{-html_more_links} .= qq{<a
+href="./topics_new.html">topics (new)</a>|<b>topics (active)</b>]};
+        }
+        # can't use SQL to filter references since our schema wasn't designed
+        # for it, but our SQL sorts by ascending time to favor top-level
+        # messages while our final result (post-references filter) favors
+        # recent messages
+        my $msgs = delete $ctx->{msgs};
+        add_topic_html($buf, pop @$msgs) while scalar(@$msgs);
+        $buf .= '</pre>';
+        PublicInbox::WwwStream::html_oneshot($ctx, 200, $buf);
+}
+
+sub response {
+        my ($ctx, $ibx_name, $category, $type) = @_;
+        my ($ret, $over);
+        $ret = PublicInbox::WWW::invalid_inbox($ctx, $ibx_name) and return $ret;
+        $over = $ctx->{ibx}->over or
+                return PublicInbox::WWW::need($ctx, 'Overview', './');
+        $ctx->{msgs} = $category eq 'new' ? topics_new($over) :
+                        topics_active($over);
+        $ctx->{topic_category} = $category;
+        $type eq 'atom' ? topics_atom($ctx) : topics_html($ctx);
+}
+
+1;
diff --git a/t/extindex-psgi.t b/t/extindex-psgi.t
index f71210a5..896c46ff 100644
--- a/t/extindex-psgi.t
+++ b/t/extindex-psgi.t
@@ -118,6 +118,14 @@ my $client = sub {
         is($res->code, 404, '404 on out-of-range mid2tid query');
         $res = $cb->(POST("/m2t/t\@1/?q=s:unrelated&x=m"));
         is($res->code, 404, '404 on cross-thread search');
+
+
+        for my $c (qw(new active)) {
+                $res = $cb->(GET("/m2t/topics_$c.html"));
+                is($res->code, 200, "topics_$c.html on basic v2");
+                $res = $cb->(GET("/all/topics_$c.html"));
+                is($res->code, 200, "topics_$c.html on extindex");
+        }
 };
 test_psgi(sub { $www->call(@_) }, $client);
 %$env = (%$env, TMPDIR => $tmpdir, PI_CONFIG => $pi_config);
diff --git a/t/plack.t b/t/plack.t
index 7f80f488..07cab12a 100644
--- a/t/plack.t
+++ b/t/plack.t
@@ -204,9 +204,13 @@ my $c1 = sub {
         my $raw = PublicInbox::Eml->new(\$body);
         is($raw->body_raw, $eml->body_raw, 'ISO-2022-JP body unmodified');
 
-        $res = $cb->(GET($pfx . '/blah@example.com/t.mbox.gz'));
-        is(501, $res->code, '501 when overview missing');
-        like($res->content, qr!\bOverview\b!, 'overview omission noted');
+        for my $u (qw(blah@example.com/t.mbox.gz topics_new.html
+                        topics_active.html)) {
+                $res = $cb->(GET("$pfx/$u"));
+                is(501, $res->code, "501 on /$u when overview missing");
+                like($res->content, qr!\bOverview\b!,
+                        "overview omission noted for /$u");
+        }
 
         # legacy redirects
         for my $t (qw(m f)) {