From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 4DAB81F453 for ; Fri, 1 Feb 2019 07:50:55 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] view: simplify quote splitting Date: Fri, 1 Feb 2019 07:50:55 +0000 Message-Id: <20190201075055.23502-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Perl "split" can capture and group in the regexp itself, so rely on that to shorten our code. Comparing the /T/ HTML output of a thread from hell (on LKML with 1356 messages) reveals no difference in the rendered result. Only the HTML source differs in newline placement before/after the closing This allows a minor speedup on my X32 Thinkpad @ 1.6GHz with the aforementioned LKML thread from hell: before: 3.67s after: 3.55s --- lib/PublicInbox/View.pm | 52 ++++++++++++++++------------------------- 1 file changed, 20 insertions(+), 32 deletions(-) diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index 782e668..69aca3d 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -503,12 +503,12 @@ sub flush_quote { # show everything in the full version with anchor from # short version (see above) - my $rv = $l->linkify_1(join('', @$quot)); - @$quot = (); + my $rv = $l->linkify_1($$quot); # we use a here to allow users to specify their own # color for quoted text $rv = $l->linkify_2(ascii_html($rv)); + $$quot = undef; $$s .= qq() . $rv . '' } @@ -590,47 +590,35 @@ sub add_text_body { $ctx->{-spfx} = $spfx; }; - my @lines = split(/^/m, $s); + # some editors don't put trailing newlines at the end: + $s .= "\n" unless $s =~ /\n\z/s; + + # split off quoted and unquoted blocks: + my @sections = split(/((?:^>[^\n]*\n)+)/sm, $s); $s = ''; if (defined($fn) || $depth > 0 || $err) { # badly-encoded message with $err? tell the world about it! $s .= attach_link($upfx, $ct, $p, $fn, $err); $s .= "\n"; } - my @quot; my $l = PublicInbox::Linkify->new; - foreach my $cur (@lines) { - if ($cur !~ /^>/) { - # show the previously buffered quote inline - flush_quote(\$s, $l, \@quot) if @quot; - - if ($diff) { - push @$diff, $cur; - } else { - # regular line, OK - $l->linkify_1($cur); - $s .= $l->linkify_2(ascii_html($cur)); - } + foreach my $cur (@sections) { + if ($cur =~ /\A>/) { + flush_quote(\$s, $l, \$cur); + } elsif ($diff) { + @$diff = split(/^/m, $cur); + $cur = undef; + flush_diff(\$s, $ctx, $l); } else { - flush_diff(\$s, $ctx, $l) if $diff && @$diff; - push @quot, $cur; + # regular lines, OK + $l->linkify_1($cur); + $s .= $l->linkify_2(ascii_html($cur)); + $cur = undef; } } - if (@quot) { # ugh, top posted - flush_quote(\$s, $l, \@quot); - flush_diff(\$s, $ctx, $l) if $diff && @$diff; - obfuscate_addrs($obfs_ibx, $s) if $obfs_ibx; - $s; - } else { - flush_diff(\$s, $ctx, $l) if $diff && @$diff; - obfuscate_addrs($obfs_ibx, $s) if $obfs_ibx; - if ($s =~ /\n\z/s) { # common, last line ends with a newline - $s; - } else { # some editors don't do newlines... - $s .= "\n"; - } - } + obfuscate_addrs($obfs_ibx, $s) if $obfs_ibx; + $s; } sub _msg_html_prepare { -- EW