about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2020-01-03 08:46:01 +0000
committerEric Wong <e@80x24.org>2020-01-04 09:35:00 +0000
commit8fe6742cd518eaeed79df87e71c267f87bac0e67 (patch)
tree443fc81451b287823625466ddf177742b40333a1
parentda1ae9ccd829966195bfe59f17e416f218746def (diff)
downloadpublic-inbox-8fe6742cd518eaeed79df87e71c267f87bac0e67.tar.gz
We now use the same regexp View::add_text_body uses.
-rw-r--r--lib/PublicInbox/SearchIdx.pm28
1 files changed, 8 insertions, 20 deletions
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 62e836e0..47537ed4 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -199,12 +199,12 @@ sub index_old_diff_fn {
 }
 
 sub index_diff ($$$) {
-        my ($self, $lines, $doc) = @_;
+        my ($self, $txt, $doc) = @_;
         my %seen;
         my $in_diff;
         my @xnq;
         my $xnq = \@xnq;
-        foreach (@$lines) {
+        foreach (split(/\n/, $txt)) {
                 if ($in_diff && s/^ //) { # diff context
                         index_diff_inc($self, $_, 'XDFCTX', $xnq);
                 } elsif (/^-- $/) { # email signature begins
@@ -278,20 +278,17 @@ sub index_diff ($$$) {
 }
 
 sub index_body ($$$) {
-        my ($self, $lines, $doc) = @_;
-        my $txt = join("\n", @$lines);
+        my ($self, $txt, $doc) = @_;
         if ($doc) {
                 # does it look like a diff?
                 if ($txt =~ /^(?:diff|---|\+\+\+) /ms) {
-                        $txt = undef;
-                        index_diff($self, $lines, $doc);
+                        index_diff($self, $txt, $doc);
                 } else {
                         index_text($self, $txt, 1, 'XNQ');
                 }
         } else {
                 index_text($self, $txt, 0, 'XQUOT');
         }
-        @$lines = ();
 }
 
 sub index_xapian { # msg_iter callback
@@ -306,19 +303,10 @@ sub index_xapian { # msg_iter callback
         my ($s, undef) = msg_part_text($part, $ct);
         defined $s or return;
 
-        my (@orig, @quot);
-        my @lines = split(/\n/, $s);
-        while (defined(my $l = shift @lines)) {
-                if ($l =~ /^>/) {
-                        index_body($self, \@orig, $doc) if @orig;
-                        push @quot, $l;
-                } else {
-                        index_body($self, \@quot, 0) if @quot;
-                        push @orig, $l;
-                }
-        }
-        index_body($self, \@quot, 0) if @quot;
-        index_body($self, \@orig, $doc) if @orig;
+        # split off quoted and unquoted blocks:
+        my @sections = split(/((?:^>[^\n]*\n)+)/sm, $s);
+        $part = $s = undef;
+        index_body($self, $_, /\A>/ ? 0 : $doc) for @sections;
 }
 
 sub add_xapian ($$$$$$) {