quiet "Complex regular subexpression recursion limit" warnings

These seem mostly harmless since Perl will just truncate the match and start a new one on a newline boundary in our case. The only downside is we'd end up with redundant <span> tags in HTML. Limiting the number of line matched ourselves with `{1,$NUM}' doesn't seem prudent since lines vary in length, so we continue to defer the job of limiting matches to the Perl regexp engine. I've noticed this warning in practice on 100K+ line patches to locale data.
author: Eric Wong <e@yhbt.net> 2020-04-03 21:06:20 +0000
committer: Eric Wong <e@yhbt.net> 2020-04-03 21:46:55 +0000
commit: 1a02e2d367b71eca9fc8093ce83fcae50873003d (patch)
tree: 99012da5753e87dca4293258d5e160d87b217b07 /t
parent: fc92ce8845ac5f09939722537624fa48441f7c0b (diff)
download: public-inbox-1a02e2d367b71eca9fc8093ce83fcae50873003d.tar.gz
1 files changed, 30 insertions, 0 deletions
diff --git a/t/msg_iter.t b/t/msg_iter.t
index e33bfc69..d303564f 100644
--- a/t/msg_iter.t
+++ b/t/msg_iter.t
@@ -78,5 +78,35 @@ use_ok('PublicInbox::MsgIter');
                  'got bullet point when X-UNKNOWN assumes UTF-8');
  }
  
+{ # API not finalized
+        my @warn;
+        local $SIG{__WARN__} = sub { push @warn, [ @_ ] };
+        my $attr = "So and so wrote:\n";
+        my $q = "> hello world\n" x 10;
+        my $nq = "hello world\n" x 10;
+        my @sections = PublicInbox::MsgIter::split_quotes($attr . $q . $nq);
+        is($sections[0], $attr, 'attribution matches');
+        is($sections[1], $q, 'quoted section matches');
+        is($sections[2], $nq, 'non-quoted section matches');
+        is(scalar(@sections), 3, 'only three sections for short message');
+        is_deeply(\@warn, [], 'no warnings');
+
+        $q x= 3300;
+        $nq x= 3300;
+        @sections = PublicInbox::MsgIter::split_quotes($attr . $q . $nq);
+        is_deeply(\@warn, [], 'no warnings on giant message');
+        is(join('', @sections), $attr . $q . $nq, 'result matches expected');
+        is(shift(@sections), $attr, 'attribution is first section');
+        my @check = ('', '');
+        while (defined(my $l = shift @sections)) {
+                next if $l eq '';
+                like($l, qr/\n\z/s, 'section ends with newline');
+                my $idx = ($l =~ /\A>/) ? 0 : 1;
+                $check[$idx] .= $l;
+        }
+        is($check[0], $q, 'long quoted section matches');
+        is($check[1], $nq, 'long quoted section matches');
+}
+
  done_testing();
  1;
author	Eric Wong <e@yhbt.net>	2020-04-03 21:06:20 +0000
committer	Eric Wong <e@yhbt.net>	2020-04-03 21:46:55 +0000
commit	1a02e2d367b71eca9fc8093ce83fcae50873003d (patch)
tree	99012da5753e87dca4293258d5e160d87b217b07 /t
parent	fc92ce8845ac5f09939722537624fa48441f7c0b (diff)
download	public-inbox-1a02e2d367b71eca9fc8093ce83fcae50873003d.tar.gz