user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 05/11] index: simplify main landing page if search-enabled
Date: Thu, 20 Aug 2015 02:57:17 +0000	[thread overview]
Message-ID: <1440039443-27052-5-git-send-email-e@80x24.org> (raw)
In-Reply-To: <1440039443-27052-1-git-send-email-e@80x24.org>

We can display /t/$MESSAGE_ID.html easily with a Xapian search
index, so rely on it instead of trying to display messages inline.
---
 lib/PublicInbox/Feed.pm   | 103 +++++++++++++++++++++++++++++++++++++++-------
 lib/PublicInbox/Search.pm |  15 +++++--
 2 files changed, 98 insertions(+), 20 deletions(-)

diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 8bfd19e..40dfb45 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -9,11 +9,15 @@ use Date::Parse qw(strptime);
 use PublicInbox::Hval;
 use PublicInbox::GitCatFile;
 use PublicInbox::View;
+use PublicInbox::MID qw/mid_clean mid_compressed/;
 use constant {
 	DATEFMT => '%Y-%m-%dT%H:%M:%SZ', # atom standard
 	MAX_PER_PAGE => 25, # this needs to be tunable
 };
 
+use Encode qw/find_encoding/;
+my $enc_utf8 = find_encoding('UTF-8');
+
 # main function
 sub generate {
 	my ($class, $ctx) = @_;
@@ -55,22 +59,30 @@ sub generate_html_index {
 
 	my $title = $feed_opts->{description} || '';
 	$title = PublicInbox::Hval->new_oneline($title)->as_html;
+	my $atom_url = $feed_opts->{atomurl};
 
 	my $html = "<html><head><title>$title</title>" .
-		'<link rel="alternate" title="Atom feed"' . "\nhref=\"" .
-		$feed_opts->{atomurl} . "\"\ntype=\"application/atom+xml\"/>" .
-		'</head><body>';
+		"<link\nrel=alternate\ntitle=\"Atom feed\"\n".
+		"href=\"$atom_url\"\"\ntype=\"application/atom+xml\"/>" .
+		'</head><body>' . PublicInbox::View::PRE_WRAP;
 
 	my $state;
 	my $git = PublicInbox::GitCatFile->new($ctx->{git_dir});
+	my $topics;
+	my $srch = $ctx->{srch};
+	$srch and $topics = [ [], {} ];
 	my (undef, $last) = each_recent_blob($ctx, sub {
-		my ($path, $commit) = @_;
-		unless (defined $state) {
-			$state = [ $ctx->{srch}, {}, $commit, 0 ];
+		my ($path, $commit, $ts, $u, $subj) = @_;
+		$state ||= [ undef, {}, $commit, 0 ];
+
+		if ($srch) {
+			add_topic($git, $srch, $topics, $path, $ts, $u, $subj);
+		} else {
+			my $mime = do_cat_mail($git, $path) or return 0;
+			$html .=
+			     PublicInbox::View->index_entry($mime, 0, $state);
+			1;
 		}
-		my $mime = do_cat_mail($git, $_[0]) or return 0;
-		$html .= PublicInbox::View->index_entry($mime, 0, $state);
-		1;
 	});
 	Email::Address->purge_cache;
 	$git = undef; # destroy pipes.
@@ -81,6 +93,7 @@ sub generate_html_index {
 		$footer .= "\n" . $list_footer if $list_footer;
 		$footer = "<hr /><pre>$footer</pre>";
 	}
+	dump_topics(\$html, $topics) if $topics;
 	$html .= "$footer</body></html>";
 }
 
@@ -92,6 +105,7 @@ sub nav_footer {
 	my $old_r = $cgi->param('r');
 	my $head = '    ';
 	my $next = '    ';
+	# $state = [ undef, {}, $first_commit, $last_anchor ];
 	my $first = $state->[2];
 	my $anchor = $state->[3];
 
@@ -128,7 +142,8 @@ sub each_recent_blob {
 	# leave us with filenames with spaces in them..
 	my @cmd = ('git', "--git-dir=$ctx->{git_dir}",
 			qw/log --no-notes --no-color --raw -r
-			   --abbrev=16 --abbrev-commit/);
+			   --abbrev=16 --abbrev-commit/,
+			"--format=%h%x00%ct%x00%an%x00%s%x00");
 	push @cmd, $range;
 
 	my $pid = open(my $log, '-|', @cmd) or
@@ -137,26 +152,29 @@ sub each_recent_blob {
 	my $last;
 	my $nr = 0;
 	my ($cur_commit, $first_commit, $last_commit);
-	while (my $line = <$log>) {
+	my ($ts, $subj, $u);
+	while (defined(my $line = <$log>)) {
 		if ($line =~ /$addmsg/o) {
 			my $add = $1;
 			next if $deleted{$add}; # optimization-only
-			$nr += $cb->($add, $cur_commit);
+			$nr += $cb->($add, $cur_commit, $ts, $u, $subj);
 			if ($nr >= $max) {
 				$last = 1;
 				last;
 			}
 		} elsif ($line =~ /$delmsg/o) {
 			$deleted{$1} = 1;
-		} elsif ($line =~ /^commit (${hex}{7,40})/o) {
-			$cur_commit = $1;
-			$first_commit = $1 unless defined $first_commit;
+		} elsif ($line =~ /^${hex}{7,40}/o) {
+			($cur_commit, $ts, $u, $subj) = split("\0", $line);
+			unless (defined $first_commit) {
+				$first_commit = $cur_commit;
+			}
 		}
 	}
 
 	if ($last) {
 		while (my $line = <$log>) {
-			if ($line =~ /^commit (${hex}{7,40})/o) {
+			if ($line =~ /^(${hex}{7,40})/o) {
 				$last_commit = $1;
 				last;
 			}
@@ -279,4 +297,57 @@ sub do_cat_mail {
 	$@ ? undef : $mime;
 }
 
+# accumulate recent topics if search is supported
+sub add_topic {
+	my ($git, $srch, $topics, $path, $ts, $u, $subj) = @_;
+	my ($order, $subjs) = @$topics;
+	my $header_obj;
+
+	# legacy ssoma did not set commit titles based on Subject
+	$subj = $enc_utf8->decode($subj);
+	if ($subj eq 'mda') {
+		my $mime = do_cat_mail($git, $path) or return 0;
+		$header_obj = $mime->header_obj;
+		$subj = mime_header($header_obj, 'Subject');
+	}
+
+	$subj = $srch->subject_normalized($subj);
+	if (++$subjs->{$subj} == 1) {
+		unless ($header_obj) {
+			my $mime = do_cat_mail($git, $path) or return 0;
+			$header_obj = $mime->header_obj;
+		}
+		my $mid = $header_obj->header_raw('Message-ID');
+		$mid = mid_compressed(mid_clean($mid));
+		$u = $enc_utf8->decode($u);
+		push @$order, [ $mid, $ts, $u, $subj ];
+		return 1;
+	}
+	0; # old topic, continue going
+}
+
+sub dump_topics {
+	my ($dst, $topics) = @_;
+	my ($order, $subjs) = @$topics;
+	$$dst .= '[No recent topics]' unless (scalar @$order);
+	while (defined(my $info = shift @$order)) {
+		my ($mid, $ts, $u, $subj) = @$info;
+		my $n = delete $subjs->{$subj};
+		$mid = PublicInbox::Hval->new($mid)->as_href;
+		$subj = PublicInbox::Hval->new($subj)->as_html;
+		$u = PublicInbox::Hval->new($u)->as_html;
+		$$dst .= "<a\nhref=\"t/$mid.html#u\"><b>$subj</b></a>\n- ";
+		$ts = POSIX::strftime('%Y-%m-%d %H:%M', gmtime($ts));
+		if ($n == 1) {
+			$$dst .= "created by $u @ $ts UTC\n"
+		} else {
+			# $n isn't the total number of posts on the topic,
+			# just the number of posts in the current "git log"
+			# window, so leave it unlabeled
+			$$dst .= "updated by $u @ $ts UTC ($n)\n"
+		}
+	}
+	$$dst .= '</pre>'
+}
+
 1;
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index b9f283f..c28401b 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -492,14 +492,21 @@ sub merge_threads {
 # normalize subjects so they are suitable as pathnames for URLs
 sub subject_path {
 	my $subj = pop;
-
-	$subj =~ s/\A\s+//;
-	$subj =~ s/\s+\z//;
-	$subj =~ s/$REPLY_RE//igo; # remove reply prefix
+	$subj = subject_normalized($subj);
 	$subj =~ s![^a-zA-Z0-9_\.~/\-]+!_!g;
 	lc($subj);
 }
 
+sub subject_normalized {
+	my $subj = pop;
+	$subj =~ s/\A\s+//s; # no leading space
+	$subj =~ s/\s+\z//s; # no trailing space
+	$subj =~ s/\s+/ /gs; # no redundant spaces
+	$subj =~ s/\.+\z//; # no trailing '.'
+	$subj =~ s/$REPLY_RE//igo; # remove reply prefix
+	$subj;
+}
+
 sub do_cat_mail {
 	my ($git, $blob) = @_;
 	my $mime = eval {
-- 
EW


  parent reply	other threads:[~2015-08-20  2:57 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-08-20  2:57 [PATCH 01/11] feed: remove threading from index Eric Wong
2015-08-20  2:57 ` [PATCH 02/11] feed: move timestamp parsing to view Eric Wong
2015-08-20  2:57 ` [PATCH 03/11] use tables for rendering comment nesting Eric Wong
2015-08-20  2:57 ` [PATCH 04/11] view: avoid nesting <a> tags from auto-linkification Eric Wong
2015-08-20  2:57 ` Eric Wong [this message]
2015-08-20  2:57 ` [PATCH 06/11] search: avoid needless decode Eric Wong
2015-08-20  2:57 ` [PATCH 07/11] search: reject ghosts in all cases Eric Wong
2015-08-20  2:57 ` [PATCH 08/11] view: reduce memory usage when displaying large threads Eric Wong
2015-08-20  2:57 ` [PATCH 09/11] search: bump schema version to 5 for subject_path Eric Wong
2015-08-20  2:57 ` [PATCH 10/11] index: layout fix + title and Atom feed links at top Eric Wong
2015-08-20  2:57 ` [PATCH 11/11] view: do not fold top-level messages in thread Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1440039443-27052-5-git-send-email-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).