From fec19e492eacb10f990091592f423542ab4249bd Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 5 Apr 2020 07:53:47 +0000 Subject: release large (non ref) scalars using `undef $sv' Using `undef EXPR' like a function call actually frees the heap memory associated with the scalar, whereas `$sv = undef' or `$sv = ""' will hold the buffer around until $sv goes out of scope. The `sv_set_undef' documentation in the perlapi(1) manpage explicitly states this: The perl equivalent is "$sv = undef;". Note that it doesn't free any string buffer, unlike "undef $sv". And I've confirmed by reading Dump() output from Devel::Peek. We'll also inline the old index_body sub in SearchIdx.pm to make the scope of the scalar more obvious. This change saves several hundred kB RSS on both -index and -httpd when hitting large emails with thousands of lines. --- lib/PublicInbox/SearchIdx.pm | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) (limited to 'lib/PublicInbox/SearchIdx.pm') diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 89d8bc2b..9a5484e3 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -275,22 +275,8 @@ sub index_diff ($$$) { index_text($self, join("\n", @xnq), 1, 'XNQ'); } -sub index_body ($$$) { - my ($self, $txt, $doc) = @_; - if ($doc) { - # does it look like a diff? - if ($txt =~ /^(?:diff|---|\+\+\+) /ms) { - index_diff($self, $txt, $doc); - } else { - index_text($self, $txt, 1, 'XNQ'); - } - } else { - index_text($self, $txt, 0, 'XQUOT'); - } -} - sub index_xapian { # msg_iter callback - my ($part, $depth, @idx) = @{$_[0]}; + my $part = $_[0]->[0]; # ignore $depth and @idx my ($self, $doc) = @{$_[1]}; my $ct = $part->content_type || 'text/plain'; my $fn = $part->filename; @@ -300,11 +286,24 @@ sub index_xapian { # msg_iter callback my ($s, undef) = msg_part_text($part, $ct); defined $s or return; + $_[0]->[0] = $part = undef; # free memory # split off quoted and unquoted blocks: my @sections = PublicInbox::MsgIter::split_quotes($s); - $part = $s = undef; - index_body($self, $_, /\A>/ ? 0 : $doc) for @sections; + undef $s; # free memory + for my $txt (@sections) { + if ($txt =~ /\A>/) { + index_text($self, $txt, 0, 'XQUOT'); + } else { + # does it look like a diff? + if ($txt =~ /^(?:diff|---|\+\+\+) /ms) { + index_diff($self, $txt, $doc); + } else { + index_text($self, $txt, 1, 'XNQ'); + } + } + undef $txt; # free memory + } } sub add_xapian ($$$$) { -- cgit v1.2.3-24-ge0c7