From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 9DC3A1F46D for ; Fri, 3 Jan 2020 08:46:03 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 2/6] searchidx: split off index_xapian for msg_iter Date: Fri, 3 Jan 2020 08:45:59 +0000 Message-Id: <20200103084603.8405-3-e@80x24.org> In-Reply-To: <20200103084603.8405-1-e@80x24.org> References: <20200103084603.8405-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This ought to save some memory, but it's probably lost in the noise given the cost of indexing. Regardless it still reduces the indentation level and makes future changes easier to read. --- lib/PublicInbox/SearchIdx.pm | 54 +++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 4cfbc4aa..5065974c 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -285,6 +285,33 @@ sub index_body ($$$) { @$lines = (); } +sub index_xapian { # msg_iter callback + my ($part, $depth, @idx) = @{$_[0]}; + my ($self, $doc) = @{$_[1]}; + my $ct = $part->content_type || 'text/plain'; + my $fn = $part->filename; + if (defined $fn && $fn ne '') { + $self->index_text($fn, 1, 'XFN'); + } + + my ($s, undef) = msg_part_text($part, $ct); + defined $s or return; + + my (@orig, @quot); + my @lines = split(/\n/, $s); + while (defined(my $l = shift @lines)) { + if ($l =~ /^>/) { + $self->index_body(\@orig, $doc) if @orig; + push @quot, $l; + } else { + $self->index_body(\@quot, 0) if @quot; + push @orig, $l; + } + } + $self->index_body(\@quot, 0) if @quot; + $self->index_body(\@orig, $doc) if @orig; +} + sub add_xapian ($$$$$) { my ($self, $mime, $num, $oid, $mids, $mid0) = @_; my $smsg = PublicInbox::SearchMsg->new($mime); @@ -303,32 +330,7 @@ sub add_xapian ($$$$$) { $self->index_text($subj, 1, 'S') if $subj; $self->index_users($smsg); - msg_iter($mime, sub { - my ($part, $depth, @idx) = @{$_[0]}; - my $ct = $part->content_type || 'text/plain'; - my $fn = $part->filename; - if (defined $fn && $fn ne '') { - $self->index_text($fn, 1, 'XFN'); - } - - my ($s, undef) = msg_part_text($part, $ct); - defined $s or return; - - my (@orig, @quot); - my @lines = split(/\n/, $s); - while (defined(my $l = shift @lines)) { - if ($l =~ /^>/) { - $self->index_body(\@orig, $doc) if @orig; - push @quot, $l; - } else { - $self->index_body(\@quot, 0) if @quot; - push @orig, $l; - } - } - $self->index_body(\@quot, 0) if @quot; - $self->index_body(\@orig, $doc) if @orig; - }); - + msg_iter($mime, \&index_xapian, [ $self, $doc ]); foreach my $mid (@$mids) { $self->index_text($mid, 1, 'XM');