about summary refs log tree commit homepage
path: root/lib/PublicInbox
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2023-03-24 10:40:22 +0000
committerEric Wong <e@80x24.org>2023-03-25 09:38:04 +0000
commit2e28cc7edb58b04404a836dffc07d47b1a38ee17 (patch)
treeb921cff677e71df3ff546036fe049c2cf1d594fb /lib/PublicInbox
parent5161b1ecf102f6190c4ce04436eedcc431ef4ad5 (diff)
downloadpublic-inbox-2e28cc7edb58b04404a836dffc07d47b1a38ee17.tar.gz
Having many ->delete_document calls in a transaction still
causes Xapian to eat up a large amount of memory and OOM on my
system.

I may reimplement --prune to avoid blocking ongoing updates, but
this is a simple fix for swapping and OOMs for now.
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r--lib/PublicInbox/CodeSearchIdx.pm23
1 files changed, 17 insertions, 6 deletions
diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm
index 704baa9c..e353f452 100644
--- a/lib/PublicInbox/CodeSearchIdx.pm
+++ b/lib/PublicInbox/CodeSearchIdx.pm
@@ -622,12 +622,21 @@ sub scan_git_dirs ($) {
 
 sub prune_cb { # git->check_async callback
         my ($hex, $type, undef, $self_id) = @_;
-        if ($type ne 'commit') {
-                my ($self, $id) = @$self_id;
-                progress($self, "$hex $type");
-                ++$self->{pruned};
-                $self->{xdb}->delete_document($id);
-        }
+        return if $type eq 'commit';
+        my ($self, $id) = @$self_id;
+        my $len = $self->{xdb}->get_doclength($id);
+        progress($self, "$hex $type (doclength=$len)");
+        ++$self->{pruned};
+        $self->{xdb}->delete_document($id);
+
+        # all math around batch_bytes calculation is pretty fuzzy,
+        # but need a way to regularly flush output to avoid OOM,
+        # so assume the average term + position overhead is the
+        # answer to everything: 42
+        return if ($self->{batch_bytes} -= ($len * 42)) > 0;
+        cidx_ckpoint($self, "[$self->{shard}] $self->{pruned}");
+        $self->{batch_bytes} = $self->{-opt}->{batch_size} //
+                        $PublicInbox::SearchIdx::BATCH_BYTES;
 }
 
 sub shard_prune { # via wq_io_do
@@ -639,6 +648,8 @@ sub shard_prune { # via wq_io_do
         my $cur = $xdb->postlist_begin('Tc');
         my $end = $xdb->postlist_end('Tc');
         my ($id, @cmt, $oid);
+        local $self->{batch_bytes} = $self->{-opt}->{batch_size} //
+                                $PublicInbox::SearchIdx::BATCH_BYTES;
         local $self->{pruned} = 0;
         for (; $cur != $end && !$DO_QUIT; $cur++) {
                 @cmt = xap_terms('Q', $xdb, $id = $cur->get_docid);