From 1a02e2d367b71eca9fc8093ce83fcae50873003d Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 3 Apr 2020 21:06:20 +0000 Subject: quiet "Complex regular subexpression recursion limit" warnings These seem mostly harmless since Perl will just truncate the match and start a new one on a newline boundary in our case. The only downside is we'd end up with redundant tags in HTML. Limiting the number of line matched ourselves with `{1,$NUM}' doesn't seem prudent since lines vary in length, so we continue to defer the job of limiting matches to the Perl regexp engine. I've noticed this warning in practice on 100K+ line patches to locale data. --- lib/PublicInbox/MsgIter.pm | 10 ++++++++++ lib/PublicInbox/SearchIdx.pm | 2 +- lib/PublicInbox/View.pm | 2 +- lib/PublicInbox/ViewDiff.pm | 11 +++++++++++ 4 files changed, 23 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/MsgIter.pm b/lib/PublicInbox/MsgIter.pm index 6c18d2bf..fa25564a 100644 --- a/lib/PublicInbox/MsgIter.pm +++ b/lib/PublicInbox/MsgIter.pm @@ -71,4 +71,14 @@ sub msg_part_text ($$) { ($s, $err); } +# returns an array of quoted or unquoted sections +sub split_quotes { + # Quiet "Complex regular subexpression recursion limit" warning + # in case an inconsiderate sender quotes 32K of text at once. + # The warning from Perl is harmless for us since our callers can + # tolerate less-than-ideal matches which work within Perl limits. + no warnings 'regexp'; + split(/((?:^>[^\n]*\n)+)/sm, shift); +} + 1; diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index fe00df53..89d8bc2b 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -302,7 +302,7 @@ sub index_xapian { # msg_iter callback defined $s or return; # split off quoted and unquoted blocks: - my @sections = split(/((?:^>[^\n]*\n)+)/sm, $s); + my @sections = PublicInbox::MsgIter::split_quotes($s); $part = $s = undef; index_body($self, $_, /\A>/ ? 0 : $doc) for @sections; } diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index c42654b6..70c10604 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -576,7 +576,7 @@ sub add_text_body { # callback for msg_iter $s .= "\n" unless $s =~ /\n\z/s; # split off quoted and unquoted blocks: - my @sections = split(/((?:^>[^\n]*\n)+)/sm, $s); + my @sections = PublicInbox::MsgIter::split_quotes($s); $s = ''; my $rv = $ctx->{obuf}; if (defined($fn) || $depth > 0 || $err) { diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm index d22c80b9..5d391a13 100644 --- a/lib/PublicInbox/ViewDiff.pm +++ b/lib/PublicInbox/ViewDiff.pm @@ -202,6 +202,17 @@ sub flush_diff ($$$) { $dctx = diff_header($dst, \$x, $ctx, \@top); } elsif ($dctx) { my $after = ''; + + # Quiet "Complex regular subexpression recursion limit" + # warning. Perl will truncate matches upon hitting + # that limit, giving us more (and shorter) scalars than + # would be ideal, but otherwise it's harmless. + # + # We could replace the `+' metacharacter with `{1,100}' + # to limit the matches ourselves to 100, but we can + # let Perl do it for us, quietly. + no warnings 'regexp'; + for my $s (split(/((?:(?:^\+[^\n]*\n)+)| (?:(?:^-[^\n]*\n)+)| (?:^@@ [^\n]+\n))/xsm, $x)) { -- cgit v1.2.3-24-ge0c7