From 7475739ec4e426004619f95f6e48fa07d940a5c0 Mon Sep 17 00:00:00 2001
From: Eric Wong <e@80x24.org>
Date: Fri, 1 Feb 2019 03:06:47 +0000
Subject: view: simplify quote splitting

Perl "split" can capture and group in the regexp itself,
so rely on that to shorten our code.

Comparing the /T/ HTML output of a thread from hell (on LKML with
1356 messages) reveals no difference in the rendered result.
Only the HTML source differs in newline placement before/after
the closing </span>

This allows a minor speedup on my X32 Thinkpad @ 1.6GHz with
the aforementioned LKML thread from hell:

before: 3.67s
 after: 3.55s
---
 lib/PublicInbox/View.pm | 52 +++++++++++++++++++------------------------------
 1 file changed, 20 insertions(+), 32 deletions(-)

(limited to 'lib')
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 782e6686..69aca3d7 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -503,12 +503,12 @@ sub flush_quote {
 
 	# show everything in the full version with anchor from
 	# short version (see above)
-	my $rv = $l->linkify_1(join('', @$quot));
-	@$quot = ();
+	my $rv = $l->linkify_1($$quot);
 
 	# we use a <span> here to allow users to specify their own
 	# color for quoted text
 	$rv = $l->linkify_2(ascii_html($rv));
+	$$quot = undef;
 	$$s .= qq(<span\nclass="q">) . $rv . '</span>'
 }
 
@@ -590,47 +590,35 @@ sub add_text_body {
 		$ctx->{-spfx} = $spfx;
 	};
 
-	my @lines = split(/^/m, $s);
+	# some editors don't put trailing newlines at the end:
+	$s .= "\n" unless $s =~ /\n\z/s;
+
+	# split off quoted and unquoted blocks:
+	my @sections = split(/((?:^>[^\n]*\n)+)/sm, $s);
 	$s = '';
 	if (defined($fn) || $depth > 0 || $err) {
 		# badly-encoded message with $err? tell the world about it!
 		$s .= attach_link($upfx, $ct, $p, $fn, $err);
 		$s .= "\n";
 	}
-	my @quot;
 	my $l = PublicInbox::Linkify->new;
-	foreach my $cur (@lines) {
-		if ($cur !~ /^>/) {
-			# show the previously buffered quote inline
-			flush_quote(\$s, $l, \@quot) if @quot;
-
-			if ($diff) {
-				push @$diff, $cur;
-			} else {
-				# regular line, OK
-				$l->linkify_1($cur);
-				$s .= $l->linkify_2(ascii_html($cur));
-			}
+	foreach my $cur (@sections) {
+		if ($cur =~ /\A>/) {
+			flush_quote(\$s, $l, \$cur);
+		} elsif ($diff) {
+			@$diff = split(/^/m, $cur);
+			$cur = undef;
+			flush_diff(\$s, $ctx, $l);
 		} else {
-			flush_diff(\$s, $ctx, $l) if $diff && @$diff;
-			push @quot, $cur;
+			# regular lines, OK
+			$l->linkify_1($cur);
+			$s .= $l->linkify_2(ascii_html($cur));
+			$cur = undef;
 		}
 	}
 
-	if (@quot) { # ugh, top posted
-		flush_quote(\$s, $l, \@quot);
-		flush_diff(\$s, $ctx, $l) if $diff && @$diff;
-		obfuscate_addrs($obfs_ibx, $s) if $obfs_ibx;
-		$s;
-	} else {
-		flush_diff(\$s, $ctx, $l) if $diff && @$diff;
-		obfuscate_addrs($obfs_ibx, $s) if $obfs_ibx;
-		if ($s =~ /\n\z/s) { # common, last line ends with a newline
-			$s;
-		} else { # some editors don't do newlines...
-			$s .= "\n";
-		}
-	}
+	obfuscate_addrs($obfs_ibx, $s) if $obfs_ibx;
+	$s;
 }
 
 sub _msg_html_prepare {
-- 
cgit v1.2.3-24-ge0c7