diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/PublicInbox/Admin.pm | 11 | ||||
-rw-r--r-- | lib/PublicInbox/SearchIdx.pm | 14 | ||||
-rw-r--r-- | lib/PublicInbox/V2Writable.pm | 3 |
3 files changed, 27 insertions, 1 deletions
diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm index 60f4f40d..62ddbe82 100644 --- a/lib/PublicInbox/Admin.pm +++ b/lib/PublicInbox/Admin.pm @@ -234,4 +234,15 @@ sub progress_prepare ($) { } } +# same unit factors as git: +sub parse_unsigned ($) { + my ($max_size) = @_; + + $$max_size =~ /\A([0-9]+)([kmg])?\z/i or return; + my ($n, $unit_factor) = ($1, $2 // ''); + my %u = ( k => 1024, m => 1024**2, g => 1024**3 ); + $$max_size = $n * ($u{lc($unit_factor)} // 1); + 1; +} + 1; diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 579b85e3..25118f43 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -64,6 +64,7 @@ sub new { $self->{lock_path} = "$inboxdir/ssoma.lock"; my $dir = $self->xdir; $self->{over} = PublicInbox::OverIdx->new("$dir/over.sqlite3"); + $self->{index_max_size} = $ibx->{index_max_size}; } elsif ($version == 2) { defined $shard or die "shard is required for v2\n"; # shard is a number @@ -572,6 +573,16 @@ sub batch_adjust ($$$$$) { } } +sub too_big ($$$) { + my ($self, $git, $oid) = @_; + my $max_size = $self->{index_max_size} or return; + my (undef, undef, $size) = $git->check($oid); + die "E: bad $oid in $git->{git_dir}\n" if !defined($size); + return if $size <= $max_size; + warn "W: skipping $oid ($size > $max_size)\n"; + 1; +} + # only for v1 sub read_log { my ($self, $log, $add_cb, $del_cb, $batch_cb) = @_; @@ -598,6 +609,7 @@ sub read_log { } next; } + next if too_big($self, $git, $blob); my $mime = do_cat_mail($git, $blob, \$bytes); my $smsg = bless {}, 'PublicInbox::Smsg'; batch_adjust(\$max, $bytes, $batch_cb, $latest, ++$nr); @@ -606,7 +618,7 @@ sub read_log { $add_cb->($self, $mime, $smsg); } elsif ($line =~ /$delmsg/o) { my $blob = $1; - $D{$blob} = 1; + $D{$blob} = 1 unless too_big($self, $git, $blob); } elsif ($line =~ /^commit ($h40)/o) { $latest = $1; $newest ||= $latest; diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 12cc1f13..01b8bed6 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -120,6 +120,7 @@ sub new { last_commit => [], # git repo -> commit }; $self->{shards} = count_shards($self) || nproc_shards($creat); + $self->{index_max_size} = $v2ibx->{index_max_size}; bless $self, $class; } @@ -867,6 +868,7 @@ sub atfork_child { sub mark_deleted ($$$$) { my ($self, $sync, $git, $oid) = @_; + return if PublicInbox::SearchIdx::too_big($self, $git, $oid); my $msgref = $git->cat_file($oid); my $mime = PublicInbox::MIME->new($$msgref); my $mids = mids($mime->header_obj); @@ -993,6 +995,7 @@ sub multi_mid_q_push ($$$) { sub reindex_oid ($$$$) { my ($self, $sync, $git, $oid) = @_; + return if PublicInbox::SearchIdx::too_big($self, $git, $oid); my ($num, $mid0, $len); my $msgref = $git->cat_file($oid, \$len); return if $len == 0; # purged |