From 6a7e3c6f870d0555184b68940eb373fa102d4102 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 10 Aug 2020 02:12:00 +0000 Subject: index: cleanup internal variables Move away from hard-to-read alllowercase naming and favor snake_case or separated-by-dashes. We'll keep `--indexlevel' as-is for now, since it's been around for several releases; but we'll support `--index-level' in the CLI and update our documentation in a few months. We'll also clarify that publicInbox.indexMaxSize is only intended for -index, and not -watch or -mda. --- lib/PublicInbox/SearchIdx.pm | 14 +++++++------- lib/PublicInbox/V2Writable.pm | 26 +++++++++++--------------- 2 files changed, 18 insertions(+), 22 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 1cf3e66c..7f2447fe 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -67,7 +67,6 @@ sub new { my $dir = $self->xdir; $self->{over} = PublicInbox::OverIdx->new("$dir/over.sqlite3"); $self->{over}->{-no_fsync} = 1 if $ibx->{-no_fsync}; - $self->{index_max_size} = $ibx->{index_max_size}; } elsif ($version == 2) { defined $shard or die "shard is required for v2\n"; # shard is a number @@ -553,10 +552,10 @@ sub index_sync { sub check_size { # check_async cb for -index --max-size=... my ($oid, $type, $size, $arg, $git) = @_; (($type // '') eq 'blob') or die "E: bad $oid in $git->{git_dir}"; - if ($size <= $arg->{index_max_size}) { + if ($size <= $arg->{max_size}) { $git->cat_async($oid, $arg->{index_oid}, $arg); } else { - warn "W: skipping $oid ($size > $arg->{index_max_size})\n"; + warn "W: skipping $oid ($size > $arg->{max_size})\n"; } } @@ -573,7 +572,7 @@ sub v1_checkpoint ($$;$) { $self->{mm}->last_commit($newest); } } else { - ${$sync->{max}} = $BATCH_BYTES; + ${$sync->{max}} = $self->{batch_bytes}; } $self->{mm}->{dbh}->commit; @@ -603,7 +602,7 @@ sub v1_checkpoint ($$;$) { sub process_stack { my ($self, $sync, $stk) = @_; my $git = $self->{ibx}->git; - my $max = $BATCH_BYTES; + my $max = $self->{batch_bytes}; my $nr = 0; $sync->{nr} = \$nr; $sync->{max} = \$max; @@ -617,13 +616,13 @@ sub process_stack { $git->cat_async($oid, \&unindex_both, $self); } } - if ($sync->{index_max_size} = $self->{ibx}->{index_max_size}) { + if ($sync->{max_size} = $sync->{-opt}->{max_size}) { $sync->{index_oid} = \&index_both; } while (my ($f, $at, $ct, $oid) = $stk->pop_rec) { if ($f eq 'm') { my $arg = { %$sync, autime => $at, cotime => $ct }; - if ($sync->{index_max_size}) { + if ($sync->{max_size}) { $git->check_async($oid, \&check_size, $arg); } else { $git->cat_async($oid, \&index_both, $arg); @@ -749,6 +748,7 @@ sub _index_sync { my ($self, $opts) = @_; my $tip = $opts->{ref} || 'HEAD'; my $git = $self->{ibx}->git; + $self->{batch_bytes} = $opts->{batch_size} // $BATCH_BYTES; $git->batch_prepare; my $pr = $opts->{-progress}; my $sync = { reindex => $opts->{reindex}, -opt => $opts }; diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 28d45d6a..72198a29 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -151,12 +151,6 @@ sub add { $self->{ibx}->with_umask(\&_add, $self, $eml, $check_cb); } -sub batch_bytes ($) { - my ($self) = @_; - ($self->{parallel} ? $self->{shards} : 1) * - $PublicInbox::SearchIdx::BATCH_BYTES; -} - # indexes a message, returns true if checkpointing is needed sub do_idx ($$$$) { my ($self, $msgref, $mime, $smsg) = @_; @@ -165,7 +159,7 @@ sub do_idx ($$$$) { my $idx = idx_shard($self, $smsg->{num} % $self->{shards}); $idx->index_raw($msgref, $mime, $smsg); my $n = $self->{transact_bytes} += $smsg->{raw_bytes}; - $n >= batch_bytes($self); + $n >= $self->{batch_bytes}; } sub _add { @@ -286,6 +280,9 @@ sub _idx_init { # with_umask callback # xcpdb can change shard count while -watch is idle my $nshards = count_shards($self); $self->{shards} = $nshards if $nshards && $nshards != $self->{shards}; + $self->{batch_bytes} = $opt->{batch_size} // + $PublicInbox::SearchIdx::BATCH_BYTES; + $self->{batch_bytes} *= $self->{shards} if $self->{parallel}; # need to create all shards before initializing msgmap FD # idx_shards must be visible to all forked processes @@ -890,7 +887,7 @@ sub reindex_checkpoint ($$) { } # allow -watch or -mda to write... - $self->idx_init; # reacquire lock + $self->idx_init($sync->{-opt}); # reacquire lock $mm_tmp->atfork_parent if $mm_tmp; } @@ -1207,12 +1204,11 @@ sub index_xap_step ($$$;$) { $pr->("Xapian indexlevel=$ibx->{indexlevel} ". "$beg..$end (% $step)\n"); } - my $batch_bytes = batch_bytes($self); for (my $num = $beg; $num <= $end; $num += $step) { my $smsg = $ibx->over->get_art($num) or next; $smsg->{v2w} = $self; $ibx->git->cat_async($smsg->{blob}, \&index_xap_only, $smsg); - if ($self->{transact_bytes} >= $batch_bytes) { + if ($self->{transact_bytes} >= $self->{batch_bytes}) { ${$sync->{nr}} = $num; reindex_checkpoint($self, $sync); } @@ -1235,7 +1231,7 @@ sub index_epoch ($$$) { $self->{current_info} = "$i.git $oid"; if ($f eq 'm') { my $arg = { %$sync, autime => $at, cotime => $ct }; - if ($sync->{index_max_size}) { + if ($sync->{max_size}) { $all->check_async($oid, \&check_size, $arg); } else { $all->cat_async($oid, \&index_oid, $arg); @@ -1254,7 +1250,7 @@ sub index_epoch ($$$) { sub xapian_only { my ($self, $opt, $sync, $art_beg) = @_; - my $seq = $opt->{sequentialshard}; + my $seq = $opt->{sequential_shard}; $art_beg //= 0; local $self->{parallel} = 0 if $seq; $self->idx_init($opt); # acquire lock @@ -1284,14 +1280,14 @@ sub xapian_only { sub index_sync { my ($self, $opt) = @_; $opt //= $_[1] //= {}; - goto \&xapian_only if $opt->{xapianonly}; + goto \&xapian_only if $opt->{xapian_only}; my $pr = $opt->{-progress}; my $epoch_max; my $latest = git_dir_latest($self, \$epoch_max); return unless defined $latest; - my $seq = $opt->{sequentialshard}; + my $seq = $opt->{sequential_shard}; my $art_beg; # the NNTP article number we start xapian_only at my $idxlevel = $self->{ibx}->{indexlevel}; local $self->{ibx}->{indexlevel} = 'basic' if $seq; @@ -1323,7 +1319,7 @@ sub index_sync { $art_beg++ if defined($art_beg); } } - if ($sync->{index_max_size} = $self->{ibx}->{index_max_size}) { + if ($sync->{max_size} = $opt->{max_size}) { $sync->{index_oid} = \&index_oid; } # work forwards through history -- cgit v1.2.3-24-ge0c7