From 60fdf3773655ab459dc52d6df8ace6555c903311 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 3 Jan 2020 08:45:59 +0000 Subject: searchidx: split off index_xapian for msg_iter This ought to save some memory, but it's probably lost in the noise given the cost of indexing. Regardless it still reduces the indentation level and makes future changes easier to read. --- lib/PublicInbox/SearchIdx.pm | 54 +++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 26 deletions(-) (limited to 'lib/PublicInbox/SearchIdx.pm') diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 4cfbc4aa..5065974c 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -285,6 +285,33 @@ sub index_body ($$$) { @$lines = (); } +sub index_xapian { # msg_iter callback + my ($part, $depth, @idx) = @{$_[0]}; + my ($self, $doc) = @{$_[1]}; + my $ct = $part->content_type || 'text/plain'; + my $fn = $part->filename; + if (defined $fn && $fn ne '') { + $self->index_text($fn, 1, 'XFN'); + } + + my ($s, undef) = msg_part_text($part, $ct); + defined $s or return; + + my (@orig, @quot); + my @lines = split(/\n/, $s); + while (defined(my $l = shift @lines)) { + if ($l =~ /^>/) { + $self->index_body(\@orig, $doc) if @orig; + push @quot, $l; + } else { + $self->index_body(\@quot, 0) if @quot; + push @orig, $l; + } + } + $self->index_body(\@quot, 0) if @quot; + $self->index_body(\@orig, $doc) if @orig; +} + sub add_xapian ($$$$$) { my ($self, $mime, $num, $oid, $mids, $mid0) = @_; my $smsg = PublicInbox::SearchMsg->new($mime); @@ -303,32 +330,7 @@ sub add_xapian ($$$$$) { $self->index_text($subj, 1, 'S') if $subj; $self->index_users($smsg); - msg_iter($mime, sub { - my ($part, $depth, @idx) = @{$_[0]}; - my $ct = $part->content_type || 'text/plain'; - my $fn = $part->filename; - if (defined $fn && $fn ne '') { - $self->index_text($fn, 1, 'XFN'); - } - - my ($s, undef) = msg_part_text($part, $ct); - defined $s or return; - - my (@orig, @quot); - my @lines = split(/\n/, $s); - while (defined(my $l = shift @lines)) { - if ($l =~ /^>/) { - $self->index_body(\@orig, $doc) if @orig; - push @quot, $l; - } else { - $self->index_body(\@quot, 0) if @quot; - push @orig, $l; - } - } - $self->index_body(\@quot, 0) if @quot; - $self->index_body(\@orig, $doc) if @orig; - }); - + msg_iter($mime, \&index_xapian, [ $self, $doc ]); foreach my $mid (@$mids) { $self->index_text($mid, 1, 'XM'); -- cgit v1.2.3-24-ge0c7