From 0e68dbad3dc5e3fbc44e8ba8be576b81455d3359 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 24 Jul 2020 05:56:02 +0000 Subject: index+xcpdb: support --no-sync flag This allows us to speed up indexing operations to SQLite and Xapian. Unfortunately, it doesn't affect operations using `xapian-compact' and the compactor API, since that doesn't seem to support Xapian::DB_NO_SYNC, yet. --- Documentation/public-inbox-index.pod | 7 +++++++ Documentation/public-inbox-xcpdb.pod | 6 ++++++ lib/PublicInbox/Msgmap.pm | 21 ++++++++++++--------- lib/PublicInbox/Over.pm | 1 + lib/PublicInbox/OverIdx.pm | 2 +- lib/PublicInbox/SearchIdx.pm | 9 ++++++++- lib/PublicInbox/V2Writable.pm | 6 ++++-- lib/PublicInbox/Xapcmd.pm | 5 +++-- script/public-inbox-index | 5 +++-- script/public-inbox-xcpdb | 4 ++-- 10 files changed, 47 insertions(+), 19 deletions(-) diff --git a/Documentation/public-inbox-index.pod b/Documentation/public-inbox-index.pod index 08f2fbf4..aeb1b3a3 100644 --- a/Documentation/public-inbox-index.pod +++ b/Documentation/public-inbox-index.pod @@ -113,6 +113,13 @@ below. Available in public-inbox 1.6.0 (PENDING). +=item --no-sync + +Disables L and L operations on SQLite +and Xapian. This is only effective with Xapian 1.4+. + +Available in public-inbox 1.6.0 (PENDING). + =back =head1 FILES diff --git a/Documentation/public-inbox-xcpdb.pod b/Documentation/public-inbox-xcpdb.pod index 149c8f78..7fe1e5fe 100644 --- a/Documentation/public-inbox-xcpdb.pod +++ b/Documentation/public-inbox-xcpdb.pod @@ -45,6 +45,12 @@ too many shards given the capabilities of the current hardware. These options are passed directly to L when used with C<--compact>. +=item --no-sync + +Disable L and L. + +Available in public-inbox 1.6.0 (PENDING). + =back =head1 ENVIRONMENT diff --git a/lib/PublicInbox/Msgmap.pm b/lib/PublicInbox/Msgmap.pm index 9d2ef0dc..839ddf7c 100644 --- a/lib/PublicInbox/Msgmap.pm +++ b/lib/PublicInbox/Msgmap.pm @@ -32,12 +32,11 @@ sub new_file { my $self = bless { filename => $f }, $class; my $dbh = $self->{dbh} = PublicInbox::Over::dbh_new($self, $rw); if ($rw) { - create_tables($dbh); - # TRUNCATE reduces I/O compared to the default (DELETE) $dbh->do('PRAGMA journal_mode = TRUNCATE'); $dbh->begin_work; + create_tables($dbh); $self->created_at(time) unless $self->created_at; my $max = $self->max // 0; @@ -51,12 +50,17 @@ sub new_file { sub tmp_clone { my ($self) = @_; my ($fh, $fn) = tempfile('msgmap-XXXXXXXX', EXLOCK => 0, TMPDIR => 1); - $self->{dbh}->sqlite_backup_to_file($fn); - my $tmp = ref($self)->new_file($fn, 1); - $tmp->{dbh}->do('PRAGMA synchronous = OFF'); - $tmp->{dbh}->do('PRAGMA journal_mode = MEMORY'); + my $tmp; + if ($self->{dbh}->can('sqlite_backup_to_dbh')) { + $tmp = ref($self)->new_file($fn, 2); + $tmp->{dbh}->do('PRAGMA journal_mode = MEMORY'); + $self->{dbh}->sqlite_backup_to_dbh($tmp->{dbh}); + } else { # DBD::SQLite <= 1.61_01 + $self->{dbh}->sqlite_backup_to_file($fn); + $tmp = ref($self)->new_file($fn, 2); + $tmp->{dbh}->do('PRAGMA journal_mode = MEMORY'); + } $tmp->{pid} = $$; - close $fh or die "failed to close $fn: $!"; $tmp; } @@ -241,8 +245,7 @@ sub atfork_parent { $self->{pid} or die 'BUG: not a temporary clone'; $self->{dbh} and die 'BUG: tmp_clone dbh not prepared for parent'; defined($self->{filename}) or die 'BUG: {filename} not defined'; - my $dbh = $self->{dbh} = PublicInbox::Over::dbh_new($self, 1); - $dbh->do('PRAGMA synchronous = OFF'); + $self->{dbh} = PublicInbox::Over::dbh_new($self, 2); } sub atfork_prepare { diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm index e3f26456..f32743c0 100644 --- a/lib/PublicInbox/Over.pm +++ b/lib/PublicInbox/Over.pm @@ -40,6 +40,7 @@ sub dbh_new { $st = pack('dd', $st[0], $st[1]); } while ($st ne $self->{st} && $tries++ < 3); warn "W: $f: .st_dev, .st_ino unstable\n" if $st ne $self->{st}; + $dbh->do('PRAGMA synchronous = OFF') if ($rw // 0) > 1; $dbh; } diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm index c57be724..fcb45079 100644 --- a/lib/PublicInbox/OverIdx.pm +++ b/lib/PublicInbox/OverIdx.pm @@ -21,7 +21,7 @@ use Carp qw(croak); sub dbh_new { my ($self) = @_; - my $dbh = $self->SUPER::dbh_new(1); + my $dbh = $self->SUPER::dbh_new($self->{-no_sync} ? 2 : 1); # TRUNCATE reduces I/O compared to the default (DELETE) # We do not use WAL since we're optimized for read-only ops, diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index c57a7e16..76425743 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -23,6 +23,7 @@ use PublicInbox::Git qw(git_unquote); use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp); my $X = \%PublicInbox::Search::X; my ($DB_CREATE_OR_OPEN, $DB_OPEN); +our $DB_NO_SYNC = 0; our $BATCH_BYTES = defined($ENV{XAPIAN_FLUSH_THRESHOLD}) ? 0x7fffffff : 1_000_000; use constant DEBUG => !!$ENV{DEBUG}; @@ -67,6 +68,7 @@ sub new { $self->{lock_path} = "$inboxdir/ssoma.lock"; my $dir = $self->xdir; $self->{over} = PublicInbox::OverIdx->new("$dir/over.sqlite3"); + $self->{over}->{-no_sync} = 1 if $ibx->{-no_sync}; $self->{index_max_size} = $ibx->{index_max_size}; } elsif ($version == 2) { defined $shard or die "shard is required for v2\n"; @@ -103,6 +105,9 @@ sub load_xapian_writable () { *sortable_serialise = $xap.'::sortable_serialise'; $DB_CREATE_OR_OPEN = eval($xap.'::DB_CREATE_OR_OPEN()'); $DB_OPEN = eval($xap.'::DB_OPEN()'); + my $ver = (eval($xap.'::major_version()') << 16) | + (eval($xap.'::minor_version()') << 8); + $DB_NO_SYNC = 0x4 if $ver >= 0x10400; 1; } @@ -126,6 +131,7 @@ sub idx_acquire { } } return unless defined $flag; + $flag |= $DB_NO_SYNC if $self->{ibx}->{-no_sync}; my $xdb = eval { ($X->{WritableDatabase})->new($dir, $flag) }; if ($@) { die "Failed opening $dir: ", $@; @@ -377,7 +383,8 @@ sub _msgmap_init ($) { die "BUG: _msgmap_init is only for v1\n" if $self->{ibx_ver} != 1; $self->{mm} //= eval { require PublicInbox::Msgmap; - PublicInbox::Msgmap->new($self->{ibx}->{inboxdir}, 1); + my $rw = $self->{ibx}->{-no_sync} ? 2 : 1; + PublicInbox::Msgmap->new($self->{ibx}->{inboxdir}, $rw); }; } diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 13c1ad6f..3dc20095 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -116,12 +116,13 @@ sub new { total_bytes => 0, current_info => '', xpfx => $xpfx, - over => PublicInbox::OverIdx->new("$xpfx/over.sqlite3", 1), + over => PublicInbox::OverIdx->new("$xpfx/over.sqlite3"), lock_path => "$dir/inbox.lock", # limit each git repo (epoch) to 1GB or so rotate_bytes => int((1024 * 1024 * 1024) / $PACKING_FACTOR), last_commit => [], # git epoch -> commit }; + $self->{over}->{-no_sync} = 1 if $v2ibx->{-no_sync}; $self->{shards} = count_shards($self) || nproc_shards($creat); $self->{index_max_size} = $v2ibx->{index_max_size}; bless $self, $class; @@ -293,7 +294,8 @@ sub _idx_init { # with_umask callback # Now that all subprocesses are up, we can open the FDs # for SQLite: my $mm = $self->{mm} = PublicInbox::Msgmap->new_file( - "$self->{ibx}->{inboxdir}/msgmap.sqlite3", 1); + "$self->{ibx}->{inboxdir}/msgmap.sqlite3", + $self->{ibx}->{-no_sync} ? 2 : 1); $mm->{dbh}->begin_work; } diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm index 4ee3fc79..d6c069d7 100644 --- a/lib/PublicInbox/Xapcmd.pm +++ b/lib/PublicInbox/Xapcmd.pm @@ -412,10 +412,11 @@ sub cpdb ($$) { # like copydatabase(1), be sure we don't overwrite anything in case # of other bugs: - my $creat = eval($PublicInbox::Search::Xap.'::DB_CREATE()'); + my $flag = eval($PublicInbox::Search::Xap.'::DB_CREATE()'); die if $@; my $XapianWritableDatabase = $PublicInbox::Search::X{WritableDatabase}; - my $dst = $XapianWritableDatabase->new($tmp, $creat); + $flag |= $PublicInbox::SearchIdx::DB_NO_SYNC if !$opt->{sync}; + my $dst = $XapianWritableDatabase->new($tmp, $flag); my $pr = $opt->{-progress}; my $pfx = $opt->{-progress_pfx} = progress_pfx($new); my $pr_data = { pr => $pr, pfx => $pfx, nr => 0 } if $pr; diff --git a/script/public-inbox-index b/script/public-inbox-index index 2e1934b0..d5c7cae2 100755 --- a/script/public-inbox-index +++ b/script/public-inbox-index @@ -14,8 +14,8 @@ PublicInbox::Admin::require_or_die('-index'); use PublicInbox::Xapcmd; my $compact_opt; -my $opt = { quiet => -1, compact => 0, maxsize => undef }; -GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune +my $opt = { quiet => -1, compact => 0, maxsize => undef, sync => 1 }; +GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune sync! indexlevel|L=s maxsize|max-size=s batchsize|batch-size=s)) or die "bad command-line args\n$usage"; die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0; @@ -59,6 +59,7 @@ for my $ibx (@ibxs) { if ($opt->{compact} >= 2) { PublicInbox::Xapcmd::run($ibx, 'compact', $compact_opt); } + $ibx->{-no_sync} = 1 if !$opt->{sync}; PublicInbox::Admin::index_inbox($ibx, undef, $opt); PublicInbox::Xapcmd::run($ibx, 'compact', $compact_opt) if $compact_opt; } diff --git a/script/public-inbox-xcpdb b/script/public-inbox-xcpdb index 2b9f032c..fcd96148 100755 --- a/script/public-inbox-xcpdb +++ b/script/public-inbox-xcpdb @@ -8,8 +8,8 @@ use PublicInbox::Xapcmd; use PublicInbox::Admin; PublicInbox::Admin::require_or_die('-search'); my $usage = "Usage: public-inbox-xcpdb [--compact] INBOX_DIR\n"; -my $opt = {}; -my @opt = (qw(compact reshard|R=i), @PublicInbox::Xapcmd::COMPACT_OPT); +my $opt = { sync => 1 }; +my @opt = (qw(sync! compact reshard|R=i), @PublicInbox::Xapcmd::COMPACT_OPT); GetOptions($opt, @opt) or die "bad command-line args\n$usage"; my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV) or die $usage; foreach (@ibxs) { -- cgit v1.2.3-24-ge0c7