diff options
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r-- | lib/PublicInbox/ExtSearchIdx.pm | 13 | ||||
-rw-r--r-- | lib/PublicInbox/SearchIdx.pm | 18 | ||||
-rw-r--r-- | lib/PublicInbox/V2Writable.pm | 5 |
3 files changed, 21 insertions, 15 deletions
diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index 11f7786d..b0a12bca 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -19,7 +19,8 @@ use v5.10.1; use parent qw(PublicInbox::ExtSearch PublicInbox::Lock); use Carp qw(croak carp); use PublicInbox::Search; -use PublicInbox::SearchIdx qw(crlf_adjust prepare_stack is_ancestor); +use PublicInbox::SearchIdx qw(crlf_adjust prepare_stack is_ancestor + is_bad_blob); use PublicInbox::OverIdx; use PublicInbox::MiscIdx; use PublicInbox::MID qw(mids); @@ -91,16 +92,6 @@ sub attach_config { $cfg->each_inbox(\&_ibx_attach, $self); } -sub is_bad_blob ($$$$) { - my ($oid, $type, $size, $expect_oid) = @_; - if ($type ne 'blob') { - carp "W: $expect_oid is not a blob (type=$type)"; - return 1; - } - croak "BUG: $oid != $expect_oid" if $oid ne $expect_oid; - $size == 0 ? 1 : 0; # size == 0 means purged -} - sub check_batch_limit ($) { my ($req) = @_; my $self = $req->{self}; diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 0124dd11..0fbe6560 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -15,7 +15,7 @@ use PublicInbox::InboxWritable; use PublicInbox::MID qw(mids_for_index mids); use PublicInbox::MsgIter; use PublicInbox::IdxStack; -use Carp qw(croak); +use Carp qw(croak carp); use POSIX qw(strftime); use Time::Local qw(timegm); use PublicInbox::OverIdx; @@ -23,7 +23,7 @@ use PublicInbox::Spawn qw(spawn nodatacow_dir); use PublicInbox::Git qw(git_unquote); use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp); our @EXPORT_OK = qw(crlf_adjust log2stack is_ancestor check_size prepare_stack - index_text term_generator add_val); + index_text term_generator add_val is_bad_blob); my $X = \%PublicInbox::Search::X; our ($DB_CREATE_OR_OPEN, $DB_OPEN); our $DB_NO_SYNC = 0; @@ -591,8 +591,19 @@ sub crlf_adjust ($) { } } +sub is_bad_blob ($$$$) { + my ($oid, $type, $size, $expect_oid) = @_; + if ($type ne 'blob') { + carp "W: $expect_oid is not a blob (type=$type)"; + return 1; + } + croak "BUG: $oid != $expect_oid" if $oid ne $expect_oid; + $size == 0 ? 1 : 0; # size == 0 means purged +} + sub index_both { # git->cat_async callback my ($bref, $oid, $type, $size, $sync) = @_; + return if is_bad_blob($oid, $type, $size, $sync->{oid}); my ($nr, $max) = @$sync{qw(nr max)}; ++$$nr; $$max -= $size; @@ -609,6 +620,7 @@ sub index_both { # git->cat_async callback sub unindex_both { # git->cat_async callback my ($bref, $oid, $type, $size, $sync) = @_; + return if is_bad_blob($oid, $type, $size, $sync->{oid}); unindex_eml($sync->{sidx}, $oid, PublicInbox::Eml->new($bref)); # may be undef if leftover if (defined(my $cur_cmt = $sync->{cur_cmt})) { @@ -713,7 +725,7 @@ sub process_stack { $sync->{index_oid} = \&index_both; } while (my ($f, $at, $ct, $oid, $cur_cmt) = $stk->pop_rec) { - my $arg = { %$sync, cur_cmt => $cur_cmt }; + my $arg = { %$sync, cur_cmt => $cur_cmt, oid => $oid }; last if $sync->{quit}; if ($f eq 'm') { $arg->{autime} = $at; diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 5aec7561..07a7fa42 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -17,7 +17,8 @@ use PublicInbox::InboxWritable; use PublicInbox::OverIdx; use PublicInbox::Msgmap; use PublicInbox::Spawn qw(spawn popen_rd); -use PublicInbox::SearchIdx qw(log2stack crlf_adjust is_ancestor check_size); +use PublicInbox::SearchIdx qw(log2stack crlf_adjust is_ancestor check_size + is_bad_blob); use IO::Handle; # ->autoflush use File::Temp (); @@ -896,6 +897,7 @@ sub reindex_checkpoint ($$) { sub index_oid { # cat_async callback my ($bref, $oid, $type, $size, $arg) = @_; + return if is_bad_blob($oid, $type, $size, $arg->{oid}); my $self = $arg->{self}; local $self->{current_info} = "$self->{current_info} $oid"; return if $size == 0; # purged @@ -1147,6 +1149,7 @@ sub unindex_oid_aux ($$$) { sub unindex_oid ($$;$) { # git->cat_async callback my ($bref, $oid, $type, $size, $sync) = @_; + return if is_bad_blob($oid, $type, $size, $sync->{oid}); my $self = $sync->{self}; local $self->{current_info} = "$self->{current_info} $oid"; my $unindexed = $sync->{in_unindex} ? $sync->{unindexed} : undef; |