From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 01E131F4B9 for ; Fri, 3 Jan 2020 08:46:04 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 4/6] searchidx: simplify quote-splitting in index_body Date: Fri, 3 Jan 2020 08:46:01 +0000 Message-Id: <20200103084603.8405-5-e@80x24.org> In-Reply-To: <20200103084603.8405-1-e@80x24.org> References: <20200103084603.8405-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: We now use the same regexp View::add_text_body uses. --- lib/PublicInbox/SearchIdx.pm | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 62e836e0..47537ed4 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -199,12 +199,12 @@ sub index_old_diff_fn { } sub index_diff ($$$) { - my ($self, $lines, $doc) = @_; + my ($self, $txt, $doc) = @_; my %seen; my $in_diff; my @xnq; my $xnq = \@xnq; - foreach (@$lines) { + foreach (split(/\n/, $txt)) { if ($in_diff && s/^ //) { # diff context index_diff_inc($self, $_, 'XDFCTX', $xnq); } elsif (/^-- $/) { # email signature begins @@ -278,20 +278,17 @@ sub index_diff ($$$) { } sub index_body ($$$) { - my ($self, $lines, $doc) = @_; - my $txt = join("\n", @$lines); + my ($self, $txt, $doc) = @_; if ($doc) { # does it look like a diff? if ($txt =~ /^(?:diff|---|\+\+\+) /ms) { - $txt = undef; - index_diff($self, $lines, $doc); + index_diff($self, $txt, $doc); } else { index_text($self, $txt, 1, 'XNQ'); } } else { index_text($self, $txt, 0, 'XQUOT'); } - @$lines = (); } sub index_xapian { # msg_iter callback @@ -306,19 +303,10 @@ sub index_xapian { # msg_iter callback my ($s, undef) = msg_part_text($part, $ct); defined $s or return; - my (@orig, @quot); - my @lines = split(/\n/, $s); - while (defined(my $l = shift @lines)) { - if ($l =~ /^>/) { - index_body($self, \@orig, $doc) if @orig; - push @quot, $l; - } else { - index_body($self, \@quot, 0) if @quot; - push @orig, $l; - } - } - index_body($self, \@quot, 0) if @quot; - index_body($self, \@orig, $doc) if @orig; + # split off quoted and unquoted blocks: + my @sections = split(/((?:^>[^\n]*\n)+)/sm, $s); + $part = $s = undef; + index_body($self, $_, /\A>/ ? 0 : $doc) for @sections; } sub add_xapian ($$$$$$) {