about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/Admin.pm11
-rw-r--r--lib/PublicInbox/SearchIdx.pm14
-rw-r--r--lib/PublicInbox/V2Writable.pm3
3 files changed, 27 insertions, 1 deletions
diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm
index 60f4f40d..62ddbe82 100644
--- a/lib/PublicInbox/Admin.pm
+++ b/lib/PublicInbox/Admin.pm
@@ -234,4 +234,15 @@ sub progress_prepare ($) {
         }
 }
 
+# same unit factors as git:
+sub parse_unsigned ($) {
+        my ($max_size) = @_;
+
+        $$max_size =~ /\A([0-9]+)([kmg])?\z/i or return;
+        my ($n, $unit_factor) = ($1, $2 // '');
+        my %u = ( k => 1024, m => 1024**2, g => 1024**3 );
+        $$max_size = $n * ($u{lc($unit_factor)} // 1);
+        1;
+}
+
 1;
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 579b85e3..25118f43 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -64,6 +64,7 @@ sub new {
                 $self->{lock_path} = "$inboxdir/ssoma.lock";
                 my $dir = $self->xdir;
                 $self->{over} = PublicInbox::OverIdx->new("$dir/over.sqlite3");
+                $self->{index_max_size} = $ibx->{index_max_size};
         } elsif ($version == 2) {
                 defined $shard or die "shard is required for v2\n";
                 # shard is a number
@@ -572,6 +573,16 @@ sub batch_adjust ($$$$$) {
         }
 }
 
+sub too_big ($$$) {
+        my ($self, $git, $oid) = @_;
+        my $max_size = $self->{index_max_size} or return;
+        my (undef, undef, $size) = $git->check($oid);
+        die "E: bad $oid in $git->{git_dir}\n" if !defined($size);
+        return if $size <= $max_size;
+        warn "W: skipping $oid ($size > $max_size)\n";
+        1;
+}
+
 # only for v1
 sub read_log {
         my ($self, $log, $add_cb, $del_cb, $batch_cb) = @_;
@@ -598,6 +609,7 @@ sub read_log {
                                 }
                                 next;
                         }
+                        next if too_big($self, $git, $blob);
                         my $mime = do_cat_mail($git, $blob, \$bytes);
                         my $smsg = bless {}, 'PublicInbox::Smsg';
                         batch_adjust(\$max, $bytes, $batch_cb, $latest, ++$nr);
@@ -606,7 +618,7 @@ sub read_log {
                         $add_cb->($self, $mime, $smsg);
                 } elsif ($line =~ /$delmsg/o) {
                         my $blob = $1;
-                        $D{$blob} = 1;
+                        $D{$blob} = 1 unless too_big($self, $git, $blob);
                 } elsif ($line =~ /^commit ($h40)/o) {
                         $latest = $1;
                         $newest ||= $latest;
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 12cc1f13..01b8bed6 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -120,6 +120,7 @@ sub new {
                 last_commit => [], # git repo -> commit
         };
         $self->{shards} = count_shards($self) || nproc_shards($creat);
+        $self->{index_max_size} = $v2ibx->{index_max_size};
         bless $self, $class;
 }
 
@@ -867,6 +868,7 @@ sub atfork_child {
 
 sub mark_deleted ($$$$) {
         my ($self, $sync, $git, $oid) = @_;
+        return if PublicInbox::SearchIdx::too_big($self, $git, $oid);
         my $msgref = $git->cat_file($oid);
         my $mime = PublicInbox::MIME->new($$msgref);
         my $mids = mids($mime->header_obj);
@@ -993,6 +995,7 @@ sub multi_mid_q_push ($$$) {
 
 sub reindex_oid ($$$$) {
         my ($self, $sync, $git, $oid) = @_;
+        return if PublicInbox::SearchIdx::too_big($self, $git, $oid);
         my ($num, $mid0, $len);
         my $msgref = $git->cat_file($oid, \$len);
         return if $len == 0; # purged