diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/PublicInbox/Config.pm | 9 | ||||
-rw-r--r-- | lib/PublicInbox/V2Writable.pm | 55 | ||||
-rw-r--r-- | lib/PublicInbox/WatchMaildir.pm | 2 |
3 files changed, 58 insertions, 8 deletions
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index 67199bb3..f9184bd2 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -369,8 +369,8 @@ sub _fill_code_repo { $git; } -sub _git_config_bool ($) { - my ($val) = @_; +sub git_bool { + my ($val) = $_[-1]; # $_[0] may be $self, or $val if ($val =~ /\A(?:false|no|off|[\-\+]?(?:0x)?0+)\z/i) { 0; } elsif ($val =~ /\A(?:true|yes|on|[\-\+]?(?:0x)?[0-9]+)\z/i) { @@ -386,7 +386,8 @@ sub _fill { foreach my $k (qw(inboxdir filter newsgroup watch httpbackendmax - replyto feedmax nntpserver indexlevel)) { + replyto feedmax nntpserver + indexlevel indexsequentialshard)) { my $v = $self->{"$pfx.$k"}; $ibx->{$k} = $v if defined $v; } @@ -400,7 +401,7 @@ sub _fill { foreach my $k (qw(obfuscate)) { my $v = $self->{"$pfx.$k"}; defined $v or next; - if (defined(my $bval = _git_config_bool($v))) { + if (defined(my $bval = git_bool($v))) { $ibx->{$k} = $bval; } else { warn "Ignoring $pfx.$k=$v in config, not boolean\n"; diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index f98afa61..7bc24592 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -875,7 +875,8 @@ sub reindex_checkpoint ($$) { $self->{ibx}->git->cleanup; # *async_wait ${$sync->{need_checkpoint}} = 0; - $sync->{mm_tmp}->atfork_prepare; + my $mm_tmp = $sync->{mm_tmp}; + $mm_tmp->atfork_prepare if $mm_tmp; $self->done; # release lock if (my $pr = $sync->{-opt}->{-progress}) { @@ -884,7 +885,7 @@ sub reindex_checkpoint ($$) { # allow -watch or -mda to write... $self->idx_init; # reacquire lock - $sync->{mm_tmp}->atfork_parent; + $mm_tmp->atfork_parent if $mm_tmp; } sub index_oid { # cat_async callback @@ -1085,7 +1086,10 @@ sub sync_prepare ($$$) { } $all->cat_async_wait; } - return 0 if (!$regen_max && !keys(%{$self->{unindex_range}})); + if (!$regen_max && !keys(%{$self->{unindex_range}})) { + $sync->{-regen_fmt} = "%u/?\n"; + return 0; + } # reindex should NOT see new commits anymore, if we do, # it's a problem and we need to notice it via die() @@ -1177,6 +1181,36 @@ sub sync_ranges ($$$) { $ranges; } +sub index_xap_only { # git->cat_async callback + my ($bref, $oid, $type, $size, $smsg) = @_; + my $self = $smsg->{v2w}; + my $idx = idx_shard($self, $smsg->{num} % $self->{shards}); + $idx->begin_txn_lazy; + $idx->add_message(PublicInbox::Eml->new($bref), $smsg); + $self->{transact_bytes} += $size; +} + +sub index_seq_shard ($$$) { + my ($self, $sync, $off) = @_; + my $ibx = $self->{ibx}; + my $max = $ibx->mm->max or return; + my $all = $ibx->git; + my $over = $ibx->over; + my $batch_bytes = $PublicInbox::SearchIdx::BATCH_BYTES; + if (my $pr = $sync->{-opt}->{-progress}) { + $pr->("Xapian indexlevel=$ibx->{indexlevel} % $off\n"); + } + for (my $num = $off; $num <= $max; $num += $self->{shards}) { + my $smsg = $over->get_art($num) or next; + $smsg->{v2w} = $self; + $all->cat_async($smsg->{blob}, \&index_xap_only, $smsg); + if ($self->{transact_bytes} >= $batch_bytes) { + ${$sync->{nr}} = $num; + reindex_checkpoint($self, $sync); + } + } +} + sub index_epoch ($$$) { my ($self, $sync, $i) = @_; @@ -1218,6 +1252,11 @@ sub index_sync { my $epoch_max; my $latest = git_dir_latest($self, \$epoch_max); return unless defined $latest; + + my $seq = $opt->{sequentialshard}; + my $idxlevel = $self->{ibx}->{indexlevel}; + local $self->{ibx}->{indexlevel} = 'basic' if $seq; + $self->idx_init($opt); # acquire lock fill_alternates($self, $epoch_max); $self->{over}->rethread_prepare($opt); @@ -1252,6 +1291,16 @@ sub index_sync { $pr->('all.git '.sprintf($sync->{-regen_fmt}, $$nr)) if $pr; } + if ($seq) { # deal with Xapian shards sequentially + my $end = $self->{shards} - 1; + $self->{ibx}->{indexlevel} = $idxlevel; + delete $sync->{mm_tmp}; + $self->idx_init($opt); # re-acquire lock + index_seq_shard($self, $sync, $_) for (0..$end); + $self->{ibx}->git->cat_async_wait; + $self->done; + } + # reindex does not pick up new changes, so we rerun w/o it: if ($opt->{reindex}) { my %again = %$opt; diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm index 142118bd..2ba10a9e 100644 --- a/lib/PublicInbox/WatchMaildir.pm +++ b/lib/PublicInbox/WatchMaildir.pm @@ -285,7 +285,7 @@ sub cfg_intvl ($$$) { sub cfg_bool ($$$) { my ($cfg, $key, $url) = @_; my $orig = $cfg->urlmatch($key, $url) // return; - my $bool = PublicInbox::Config::_git_config_bool($orig); + my $bool = $cfg->git_bool($orig); warn "W: $key=$orig for $url is not boolean\n" unless defined($bool); $bool; } |