about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/ExtSearchIdx.pm13
-rw-r--r--lib/PublicInbox/SearchIdx.pm18
-rw-r--r--lib/PublicInbox/V2Writable.pm5
3 files changed, 21 insertions, 15 deletions
diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
index 11f7786d..b0a12bca 100644
--- a/lib/PublicInbox/ExtSearchIdx.pm
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -19,7 +19,8 @@ use v5.10.1;
 use parent qw(PublicInbox::ExtSearch PublicInbox::Lock);
 use Carp qw(croak carp);
 use PublicInbox::Search;
-use PublicInbox::SearchIdx qw(crlf_adjust prepare_stack is_ancestor);
+use PublicInbox::SearchIdx qw(crlf_adjust prepare_stack is_ancestor
+        is_bad_blob);
 use PublicInbox::OverIdx;
 use PublicInbox::MiscIdx;
 use PublicInbox::MID qw(mids);
@@ -91,16 +92,6 @@ sub attach_config {
         $cfg->each_inbox(\&_ibx_attach, $self);
 }
 
-sub is_bad_blob ($$$$) {
-        my ($oid, $type, $size, $expect_oid) = @_;
-        if ($type ne 'blob') {
-                carp "W: $expect_oid is not a blob (type=$type)";
-                return 1;
-        }
-        croak "BUG: $oid != $expect_oid" if $oid ne $expect_oid;
-        $size == 0 ? 1 : 0; # size == 0 means purged
-}
-
 sub check_batch_limit ($) {
         my ($req) = @_;
         my $self = $req->{self};
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 0124dd11..0fbe6560 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -15,7 +15,7 @@ use PublicInbox::InboxWritable;
 use PublicInbox::MID qw(mids_for_index mids);
 use PublicInbox::MsgIter;
 use PublicInbox::IdxStack;
-use Carp qw(croak);
+use Carp qw(croak carp);
 use POSIX qw(strftime);
 use Time::Local qw(timegm);
 use PublicInbox::OverIdx;
@@ -23,7 +23,7 @@ use PublicInbox::Spawn qw(spawn nodatacow_dir);
 use PublicInbox::Git qw(git_unquote);
 use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp);
 our @EXPORT_OK = qw(crlf_adjust log2stack is_ancestor check_size prepare_stack
-        index_text term_generator add_val);
+        index_text term_generator add_val is_bad_blob);
 my $X = \%PublicInbox::Search::X;
 our ($DB_CREATE_OR_OPEN, $DB_OPEN);
 our $DB_NO_SYNC = 0;
@@ -591,8 +591,19 @@ sub crlf_adjust ($) {
         }
 }
 
+sub is_bad_blob ($$$$) {
+        my ($oid, $type, $size, $expect_oid) = @_;
+        if ($type ne 'blob') {
+                carp "W: $expect_oid is not a blob (type=$type)";
+                return 1;
+        }
+        croak "BUG: $oid != $expect_oid" if $oid ne $expect_oid;
+        $size == 0 ? 1 : 0; # size == 0 means purged
+}
+
 sub index_both { # git->cat_async callback
         my ($bref, $oid, $type, $size, $sync) = @_;
+        return if is_bad_blob($oid, $type, $size, $sync->{oid});
         my ($nr, $max) = @$sync{qw(nr max)};
         ++$$nr;
         $$max -= $size;
@@ -609,6 +620,7 @@ sub index_both { # git->cat_async callback
 
 sub unindex_both { # git->cat_async callback
         my ($bref, $oid, $type, $size, $sync) = @_;
+        return if is_bad_blob($oid, $type, $size, $sync->{oid});
         unindex_eml($sync->{sidx}, $oid, PublicInbox::Eml->new($bref));
         # may be undef if leftover
         if (defined(my $cur_cmt = $sync->{cur_cmt})) {
@@ -713,7 +725,7 @@ sub process_stack {
                 $sync->{index_oid} = \&index_both;
         }
         while (my ($f, $at, $ct, $oid, $cur_cmt) = $stk->pop_rec) {
-                my $arg = { %$sync, cur_cmt => $cur_cmt };
+                my $arg = { %$sync, cur_cmt => $cur_cmt, oid => $oid };
                 last if $sync->{quit};
                 if ($f eq 'm') {
                         $arg->{autime} = $at;
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 5aec7561..07a7fa42 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -17,7 +17,8 @@ use PublicInbox::InboxWritable;
 use PublicInbox::OverIdx;
 use PublicInbox::Msgmap;
 use PublicInbox::Spawn qw(spawn popen_rd);
-use PublicInbox::SearchIdx qw(log2stack crlf_adjust is_ancestor check_size);
+use PublicInbox::SearchIdx qw(log2stack crlf_adjust is_ancestor check_size
+        is_bad_blob);
 use IO::Handle; # ->autoflush
 use File::Temp ();
 
@@ -896,6 +897,7 @@ sub reindex_checkpoint ($$) {
 
 sub index_oid { # cat_async callback
         my ($bref, $oid, $type, $size, $arg) = @_;
+        return if is_bad_blob($oid, $type, $size, $arg->{oid});
         my $self = $arg->{self};
         local $self->{current_info} = "$self->{current_info} $oid";
         return if $size == 0; # purged
@@ -1147,6 +1149,7 @@ sub unindex_oid_aux ($$$) {
 
 sub unindex_oid ($$;$) { # git->cat_async callback
         my ($bref, $oid, $type, $size, $sync) = @_;
+        return if is_bad_blob($oid, $type, $size, $sync->{oid});
         my $self = $sync->{self};
         local $self->{current_info} = "$self->{current_info} $oid";
         my $unindexed = $sync->{in_unindex} ? $sync->{unindexed} : undef;