about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2017-06-14 00:14:47 +0000
committerEric Wong <e@80x24.org>2017-06-14 00:15:45 +0000
commit970eb1fd83b93c790d2faed6bf64a97d6d5fe126 (patch)
tree644162ea4055a4c674e0fd78c89495008bbac12c /lib
parent7eeadcb62729b0efbcb53cd9b7b181897c92cf9a (diff)
downloadpublic-inbox-970eb1fd83b93c790d2faed6bf64a97d6d5fe126.tar.gz
Xapian memory usage is tied to the size of the indexed
text, so take the raw message size into account when
deciding when to flush Xapian data.

More importantly, we now flush Xapian before we have it
buffer beyond our maximum; and we do it unconditionally
to prevent even high priority processes from OOM-ing.
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/SearchIdx.pm23
1 files changed, 15 insertions, 8 deletions
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 316111bf..30d3fe92 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -20,13 +20,14 @@ use Carp qw(croak);
 use POSIX qw(strftime);
 require PublicInbox::Git;
 
-use constant MAX_MID_SIZE => 244; # max term size - 1 in Xapian
 use constant {
+        MAX_MID_SIZE => 244, # max term size - 1 in Xapian
         PERM_UMASK => 0,
         OLD_PERM_GROUP => 1,
         OLD_PERM_EVERYBODY => 2,
         PERM_GROUP => 0660,
         PERM_EVERYBODY => 0664,
+        BATCH_BYTES => 1_000_000,
 };
 
 sub new {
@@ -71,7 +72,6 @@ sub _xdb_acquire {
                 require File::Path;
                 _lock_acquire($self);
                 File::Path::mkpath($dir);
-                $self->{batch_size} = 100;
                 $flag = Search::Xapian::DB_CREATE_OR_OPEN;
         }
         $self->{xdb} = Search::Xapian::WritableDatabase->new($dir, $flag);
@@ -395,6 +395,15 @@ sub index_sync {
         with_umask($self, sub { $self->_index_sync($opts) });
 }
 
+sub batch_adjust ($$$$) {
+        my ($max, $bytes, $batch_cb, $latest) = @_;
+        $$max -= $bytes;
+        if ($$max <= 0) {
+                $$max = BATCH_BYTES;
+                $batch_cb->($latest, 1);
+        }
+}
+
 sub rlog {
         my ($self, $log, $add_cb, $del_cb, $batch_cb) = @_;
         my $hex = '[a-f0-9]';
@@ -404,23 +413,21 @@ sub rlog {
         my $git = $self->{git};
         my $latest;
         my $bytes;
-        my $max = $self->{batch_size}; # may be undef
+        my $max = BATCH_BYTES;
         local $/ = "\n";
         my $line;
         while (defined($line = <$log>)) {
                 if ($line =~ /$addmsg/o) {
                         my $blob = $1;
                         my $mime = do_cat_mail($git, $blob, \$bytes) or next;
+                        batch_adjust(\$max, $bytes, $batch_cb, $latest);
                         $add_cb->($self, $mime, $bytes, $blob);
                 } elsif ($line =~ /$delmsg/o) {
                         my $blob = $1;
-                        my $mime = do_cat_mail($git, $blob) or next;
+                        my $mime = do_cat_mail($git, $blob, \$bytes) or next;
+                        batch_adjust(\$max, $bytes, $batch_cb, $latest);
                         $del_cb->($self, $mime);
                 } elsif ($line =~ /^commit ($h40)/o) {
-                        if (defined $max && --$max <= 0) {
-                                $max = $self->{batch_size};
-                                $batch_cb->($latest, 1);
-                        }
                         $latest = $1;
                 }
         }