about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
authorEric Wong <e@yhbt.net>2020-04-03 21:06:20 +0000
committerEric Wong <e@yhbt.net>2020-04-03 21:46:55 +0000
commit1a02e2d367b71eca9fc8093ce83fcae50873003d (patch)
tree99012da5753e87dca4293258d5e160d87b217b07 /lib
parentfc92ce8845ac5f09939722537624fa48441f7c0b (diff)
downloadpublic-inbox-1a02e2d367b71eca9fc8093ce83fcae50873003d.tar.gz
These seem mostly harmless since Perl will just truncate the
match and start a new one on a newline boundary in our case.
The only downside is we'd end up with redundant <span> tags in
HTML.

Limiting the number of line matched ourselves with `{1,$NUM}'
doesn't seem prudent since lines vary in length, so we continue
to defer the job of limiting matches to the Perl regexp engine.

I've noticed this warning in practice on 100K+ line patches to
locale data.
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/MsgIter.pm10
-rw-r--r--lib/PublicInbox/SearchIdx.pm2
-rw-r--r--lib/PublicInbox/View.pm2
-rw-r--r--lib/PublicInbox/ViewDiff.pm11
4 files changed, 23 insertions, 2 deletions
diff --git a/lib/PublicInbox/MsgIter.pm b/lib/PublicInbox/MsgIter.pm
index 6c18d2bf..fa25564a 100644
--- a/lib/PublicInbox/MsgIter.pm
+++ b/lib/PublicInbox/MsgIter.pm
@@ -71,4 +71,14 @@ sub msg_part_text ($$) {
         ($s, $err);
 }
 
+# returns an array of quoted or unquoted sections
+sub split_quotes {
+        # Quiet "Complex regular subexpression recursion limit" warning
+        # in case an inconsiderate sender quotes 32K of text at once.
+        # The warning from Perl is harmless for us since our callers can
+        # tolerate less-than-ideal matches which work within Perl limits.
+        no warnings 'regexp';
+        split(/((?:^>[^\n]*\n)+)/sm, shift);
+}
+
 1;
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index fe00df53..89d8bc2b 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -302,7 +302,7 @@ sub index_xapian { # msg_iter callback
         defined $s or return;
 
         # split off quoted and unquoted blocks:
-        my @sections = split(/((?:^>[^\n]*\n)+)/sm, $s);
+        my @sections = PublicInbox::MsgIter::split_quotes($s);
         $part = $s = undef;
         index_body($self, $_, /\A>/ ? 0 : $doc) for @sections;
 }
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index c42654b6..70c10604 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -576,7 +576,7 @@ sub add_text_body { # callback for msg_iter
         $s .= "\n" unless $s =~ /\n\z/s;
 
         # split off quoted and unquoted blocks:
-        my @sections = split(/((?:^>[^\n]*\n)+)/sm, $s);
+        my @sections = PublicInbox::MsgIter::split_quotes($s);
         $s = '';
         my $rv = $ctx->{obuf};
         if (defined($fn) || $depth > 0 || $err) {
diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm
index d22c80b9..5d391a13 100644
--- a/lib/PublicInbox/ViewDiff.pm
+++ b/lib/PublicInbox/ViewDiff.pm
@@ -202,6 +202,17 @@ sub flush_diff ($$$) {
                         $dctx = diff_header($dst, \$x, $ctx, \@top);
                 } elsif ($dctx) {
                         my $after = '';
+
+                        # Quiet "Complex regular subexpression recursion limit"
+                        # warning.  Perl will truncate matches upon hitting
+                        # that limit, giving us more (and shorter) scalars than
+                        # would be ideal, but otherwise it's harmless.
+                        #
+                        # We could replace the `+' metacharacter with `{1,100}'
+                        # to limit the matches ourselves to 100, but we can
+                        # let Perl do it for us, quietly.
+                        no warnings 'regexp';
+
                         for my $s (split(/((?:(?:^\+[^\n]*\n)+)|
                                         (?:(?:^-[^\n]*\n)+)|
                                         (?:^@@ [^\n]+\n))/xsm, $x)) {