about summary refs log tree commit homepage
path: root/lib/PublicInbox/SearchIdx.pm
diff options
context:
space:
mode:
authorEric Wong <e@yhbt.net>2020-07-24 05:56:04 +0000
committerEric Wong <e@yhbt.net>2020-07-25 20:48:18 +0000
commitedcf14a75af994821e624c42e3de31079b2ae70a (patch)
tree287b243f112a984222085e025934598fec5b4303 /lib/PublicInbox/SearchIdx.pm
parent95e35e1f546dfec0294380e958ae3b4f4598ce03 (diff)
downloadpublic-inbox-edcf14a75af994821e624c42e3de31079b2ae70a.tar.gz
This allows v1 indexing to run while the `cat-file --batch-check'
process is waiting on high-latency storage.
Diffstat (limited to 'lib/PublicInbox/SearchIdx.pm')
-rw-r--r--lib/PublicInbox/SearchIdx.pm23
1 files changed, 19 insertions, 4 deletions
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 4d2e0da9..39dc1f87 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -563,6 +563,16 @@ sub too_big ($$) {
         1;
 }
 
+sub ck_size { # check_async cb for -index --max-size=...
+        my ($oid, $type, $size, $arg, $git) = @_;
+        (($type // '') eq 'blob') or die "E: bad $oid in $git->{git_dir}";
+        if ($size <= $arg->{index_max_size}) {
+                $git->cat_async($oid, \&index_both, $arg);
+        } else {
+                warn "W: skipping $oid ($size > $arg->{index_max_size})\n";
+        }
+}
+
 # only for v1
 sub process_stack {
         my ($self, $stk, $sync, $batch_cb) = @_;
@@ -580,13 +590,17 @@ sub process_stack {
                         $git->cat_async($oid, \&unindex_both, $self);
                 }
         }
+        $sync->{index_max_size} = $self->{ibx}->{index_max_size};
         while (my ($f, $at, $ct, $oid) = $stk->pop_rec) {
                 if ($f eq 'm') {
-                        $sync->{autime} = $at;
-                        $sync->{cotime} = $ct;
-                        next if too_big($self, $oid);
-                        $git->cat_async($oid, \&index_both, { %$sync });
+                        my $arg = { %$sync, autime => $at, cotime => $ct };
+                        if ($sync->{index_max_size}) {
+                                $git->check_async($oid, \&ck_size, $arg);
+                        } else {
+                                $git->cat_async($oid, \&index_both, $arg);
+                        }
                         if ($max <= 0) {
+                                $git->check_async_wait;
                                 $git->cat_async_wait;
                                 $max = $BATCH_BYTES;
                                 $batch_cb->($nr);
@@ -595,6 +609,7 @@ sub process_stack {
                         $git->cat_async($oid, \&unindex_both, $self);
                 }
         }
+        $git->check_async_wait;
         $git->cat_async_wait;
         $batch_cb->($nr, $stk);
 }