about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
authorEric Wong <e@yhbt.net>2020-08-10 02:12:00 +0000
committerEric Wong <e@yhbt.net>2020-08-10 06:26:21 +0000
commit6a7e3c6f870d0555184b68940eb373fa102d4102 (patch)
treea0b696867ad31baba92ad4af64b76ae5aec031ea /lib
parent5fdedf809e7f236c7e50177bff8426a9befbcceb (diff)
downloadpublic-inbox-6a7e3c6f870d0555184b68940eb373fa102d4102.tar.gz
Move away from hard-to-read alllowercase naming and favor
snake_case or separated-by-dashes.

We'll keep `--indexlevel' as-is for now, since it's been around
for several releases; but we'll support `--index-level' in the
CLI and update our documentation in a few months.

We'll also clarify that publicInbox.indexMaxSize is only
intended for -index, and not -watch or -mda.
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/SearchIdx.pm14
-rw-r--r--lib/PublicInbox/V2Writable.pm26
2 files changed, 18 insertions, 22 deletions
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 1cf3e66c..7f2447fe 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -67,7 +67,6 @@ sub new {
                 my $dir = $self->xdir;
                 $self->{over} = PublicInbox::OverIdx->new("$dir/over.sqlite3");
                 $self->{over}->{-no_fsync} = 1 if $ibx->{-no_fsync};
-                $self->{index_max_size} = $ibx->{index_max_size};
         } elsif ($version == 2) {
                 defined $shard or die "shard is required for v2\n";
                 # shard is a number
@@ -553,10 +552,10 @@ sub index_sync {
 sub check_size { # check_async cb for -index --max-size=...
         my ($oid, $type, $size, $arg, $git) = @_;
         (($type // '') eq 'blob') or die "E: bad $oid in $git->{git_dir}";
-        if ($size <= $arg->{index_max_size}) {
+        if ($size <= $arg->{max_size}) {
                 $git->cat_async($oid, $arg->{index_oid}, $arg);
         } else {
-                warn "W: skipping $oid ($size > $arg->{index_max_size})\n";
+                warn "W: skipping $oid ($size > $arg->{max_size})\n";
         }
 }
 
@@ -573,7 +572,7 @@ sub v1_checkpoint ($$;$) {
                         $self->{mm}->last_commit($newest);
                 }
         } else {
-                ${$sync->{max}} = $BATCH_BYTES;
+                ${$sync->{max}} = $self->{batch_bytes};
         }
 
         $self->{mm}->{dbh}->commit;
@@ -603,7 +602,7 @@ sub v1_checkpoint ($$;$) {
 sub process_stack {
         my ($self, $sync, $stk) = @_;
         my $git = $self->{ibx}->git;
-        my $max = $BATCH_BYTES;
+        my $max = $self->{batch_bytes};
         my $nr = 0;
         $sync->{nr} = \$nr;
         $sync->{max} = \$max;
@@ -617,13 +616,13 @@ sub process_stack {
                         $git->cat_async($oid, \&unindex_both, $self);
                 }
         }
-        if ($sync->{index_max_size} = $self->{ibx}->{index_max_size}) {
+        if ($sync->{max_size} = $sync->{-opt}->{max_size}) {
                 $sync->{index_oid} = \&index_both;
         }
         while (my ($f, $at, $ct, $oid) = $stk->pop_rec) {
                 if ($f eq 'm') {
                         my $arg = { %$sync, autime => $at, cotime => $ct };
-                        if ($sync->{index_max_size}) {
+                        if ($sync->{max_size}) {
                                 $git->check_async($oid, \&check_size, $arg);
                         } else {
                                 $git->cat_async($oid, \&index_both, $arg);
@@ -749,6 +748,7 @@ sub _index_sync {
         my ($self, $opts) = @_;
         my $tip = $opts->{ref} || 'HEAD';
         my $git = $self->{ibx}->git;
+        $self->{batch_bytes} = $opts->{batch_size} // $BATCH_BYTES;
         $git->batch_prepare;
         my $pr = $opts->{-progress};
         my $sync = { reindex => $opts->{reindex}, -opt => $opts };
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 28d45d6a..72198a29 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -151,12 +151,6 @@ sub add {
         $self->{ibx}->with_umask(\&_add, $self, $eml, $check_cb);
 }
 
-sub batch_bytes ($) {
-        my ($self) = @_;
-        ($self->{parallel} ? $self->{shards} : 1) *
-                $PublicInbox::SearchIdx::BATCH_BYTES;
-}
-
 # indexes a message, returns true if checkpointing is needed
 sub do_idx ($$$$) {
         my ($self, $msgref, $mime, $smsg) = @_;
@@ -165,7 +159,7 @@ sub do_idx ($$$$) {
         my $idx = idx_shard($self, $smsg->{num} % $self->{shards});
         $idx->index_raw($msgref, $mime, $smsg);
         my $n = $self->{transact_bytes} += $smsg->{raw_bytes};
-        $n >= batch_bytes($self);
+        $n >= $self->{batch_bytes};
 }
 
 sub _add {
@@ -286,6 +280,9 @@ sub _idx_init { # with_umask callback
         # xcpdb can change shard count while -watch is idle
         my $nshards = count_shards($self);
         $self->{shards} = $nshards if $nshards && $nshards != $self->{shards};
+        $self->{batch_bytes} = $opt->{batch_size} //
+                                $PublicInbox::SearchIdx::BATCH_BYTES;
+        $self->{batch_bytes} *= $self->{shards} if $self->{parallel};
 
         # need to create all shards before initializing msgmap FD
         # idx_shards must be visible to all forked processes
@@ -890,7 +887,7 @@ sub reindex_checkpoint ($$) {
         }
 
         # allow -watch or -mda to write...
-        $self->idx_init; # reacquire lock
+        $self->idx_init($sync->{-opt}); # reacquire lock
         $mm_tmp->atfork_parent if $mm_tmp;
 }
 
@@ -1207,12 +1204,11 @@ sub index_xap_step ($$$;$) {
                 $pr->("Xapian indexlevel=$ibx->{indexlevel} ".
                         "$beg..$end (% $step)\n");
         }
-        my $batch_bytes = batch_bytes($self);
         for (my $num = $beg; $num <= $end; $num += $step) {
                 my $smsg = $ibx->over->get_art($num) or next;
                 $smsg->{v2w} = $self;
                 $ibx->git->cat_async($smsg->{blob}, \&index_xap_only, $smsg);
-                if ($self->{transact_bytes} >= $batch_bytes) {
+                if ($self->{transact_bytes} >= $self->{batch_bytes}) {
                         ${$sync->{nr}} = $num;
                         reindex_checkpoint($self, $sync);
                 }
@@ -1235,7 +1231,7 @@ sub index_epoch ($$$) {
                 $self->{current_info} = "$i.git $oid";
                 if ($f eq 'm') {
                         my $arg = { %$sync, autime => $at, cotime => $ct };
-                        if ($sync->{index_max_size}) {
+                        if ($sync->{max_size}) {
                                 $all->check_async($oid, \&check_size, $arg);
                         } else {
                                 $all->cat_async($oid, \&index_oid, $arg);
@@ -1254,7 +1250,7 @@ sub index_epoch ($$$) {
 
 sub xapian_only {
         my ($self, $opt, $sync, $art_beg) = @_;
-        my $seq = $opt->{sequentialshard};
+        my $seq = $opt->{sequential_shard};
         $art_beg //= 0;
         local $self->{parallel} = 0 if $seq;
         $self->idx_init($opt); # acquire lock
@@ -1284,14 +1280,14 @@ sub xapian_only {
 sub index_sync {
         my ($self, $opt) = @_;
         $opt //= $_[1] //= {};
-        goto \&xapian_only if $opt->{xapianonly};
+        goto \&xapian_only if $opt->{xapian_only};
 
         my $pr = $opt->{-progress};
         my $epoch_max;
         my $latest = git_dir_latest($self, \$epoch_max);
         return unless defined $latest;
 
-        my $seq = $opt->{sequentialshard};
+        my $seq = $opt->{sequential_shard};
         my $art_beg; # the NNTP article number we start xapian_only at
         my $idxlevel = $self->{ibx}->{indexlevel};
         local $self->{ibx}->{indexlevel} = 'basic' if $seq;
@@ -1323,7 +1319,7 @@ sub index_sync {
                         $art_beg++ if defined($art_beg);
                 }
         }
-        if ($sync->{index_max_size} = $self->{ibx}->{index_max_size}) {
+        if ($sync->{max_size} = $opt->{max_size}) {
                 $sync->{index_oid} = \&index_oid;
         }
         # work forwards through history