about summary refs log tree commit homepage
path: root/lib/PublicInbox/SearchIdx.pm
diff options
context:
space:
mode:
authorEric Wong <e@yhbt.net>2020-04-05 07:53:47 +0000
committerEric Wong <e@yhbt.net>2020-04-05 22:06:20 +0000
commitfec19e492eacb10f990091592f423542ab4249bd (patch)
tree075f20f90ae319b2e30e949c635fa53ff06d47f3 /lib/PublicInbox/SearchIdx.pm
parentb6fc8916a05176ef006b07bba977b59cdf6a0bce (diff)
downloadpublic-inbox-fec19e492eacb10f990091592f423542ab4249bd.tar.gz
Using `undef EXPR' like a function call actually frees the heap
memory associated with the scalar, whereas `$sv = undef' or
`$sv = ""' will hold the buffer around until $sv goes out
of scope.

The `sv_set_undef' documentation in the perlapi(1) manpage
explicitly states this:

  The perl equivalent is "$sv = undef;". Note that it doesn't
  free any string buffer, unlike "undef $sv".

And I've confirmed by reading Dump() output from Devel::Peek.

We'll also inline the old index_body sub in SearchIdx.pm to make
the scope of the scalar more obvious.

This change saves several hundred kB RSS on both -index and
-httpd when hitting large emails with thousands of lines.
Diffstat (limited to 'lib/PublicInbox/SearchIdx.pm')
-rw-r--r--lib/PublicInbox/SearchIdx.pm33
1 files changed, 16 insertions, 17 deletions
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 89d8bc2b..9a5484e3 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -275,22 +275,8 @@ sub index_diff ($$$) {
         index_text($self, join("\n", @xnq), 1, 'XNQ');
 }
 
-sub index_body ($$$) {
-        my ($self, $txt, $doc) = @_;
-        if ($doc) {
-                # does it look like a diff?
-                if ($txt =~ /^(?:diff|---|\+\+\+) /ms) {
-                        index_diff($self, $txt, $doc);
-                } else {
-                        index_text($self, $txt, 1, 'XNQ');
-                }
-        } else {
-                index_text($self, $txt, 0, 'XQUOT');
-        }
-}
-
 sub index_xapian { # msg_iter callback
-        my ($part, $depth, @idx) = @{$_[0]};
+        my $part = $_[0]->[0]; # ignore $depth and @idx
         my ($self, $doc) = @{$_[1]};
         my $ct = $part->content_type || 'text/plain';
         my $fn = $part->filename;
@@ -300,11 +286,24 @@ sub index_xapian { # msg_iter callback
 
         my ($s, undef) = msg_part_text($part, $ct);
         defined $s or return;
+        $_[0]->[0] = $part = undef; # free memory
 
         # split off quoted and unquoted blocks:
         my @sections = PublicInbox::MsgIter::split_quotes($s);
-        $part = $s = undef;
-        index_body($self, $_, /\A>/ ? 0 : $doc) for @sections;
+        undef $s; # free memory
+        for my $txt (@sections) {
+                if ($txt =~ /\A>/) {
+                        index_text($self, $txt, 0, 'XQUOT');
+                } else {
+                        # does it look like a diff?
+                        if ($txt =~ /^(?:diff|---|\+\+\+) /ms) {
+                                index_diff($self, $txt, $doc);
+                        } else {
+                                index_text($self, $txt, 1, 'XNQ');
+                        }
+                }
+                undef $txt; # free memory
+        }
 }
 
 sub add_xapian ($$$$) {