From: Eric Wong <e@yhbt.net>
To: meta@public-inbox.org
Subject: [PATCH 16/20] index+xcpdb: support --no-sync flag
Date: Fri, 24 Jul 2020 05:56:02 +0000 [thread overview]
Message-ID: <20200724055606.27332-17-e@yhbt.net> (raw)
In-Reply-To: <20200724055606.27332-1-e@yhbt.net>
This allows us to speed up indexing operations to SQLite
and Xapian.
Unfortunately, it doesn't affect operations using
`xapian-compact' and the compactor API, since that doesn't seem
to support Xapian::DB_NO_SYNC, yet.
---
Documentation/public-inbox-index.pod | 7 +++++++
Documentation/public-inbox-xcpdb.pod | 6 ++++++
lib/PublicInbox/Msgmap.pm | 21 ++++++++++++---------
lib/PublicInbox/Over.pm | 1 +
lib/PublicInbox/OverIdx.pm | 2 +-
lib/PublicInbox/SearchIdx.pm | 9 ++++++++-
lib/PublicInbox/V2Writable.pm | 6 ++++--
lib/PublicInbox/Xapcmd.pm | 5 +++--
script/public-inbox-index | 5 +++--
script/public-inbox-xcpdb | 4 ++--
10 files changed, 47 insertions(+), 19 deletions(-)
diff --git a/Documentation/public-inbox-index.pod b/Documentation/public-inbox-index.pod
index 08f2fbf45..aeb1b3a39 100644
--- a/Documentation/public-inbox-index.pod
+++ b/Documentation/public-inbox-index.pod
@@ -113,6 +113,13 @@ below.
Available in public-inbox 1.6.0 (PENDING).
+=item --no-sync
+
+Disables L<fsync(2)> and L<fdatasync(2)> operations on SQLite
+and Xapian. This is only effective with Xapian 1.4+.
+
+Available in public-inbox 1.6.0 (PENDING).
+
=back
=head1 FILES
diff --git a/Documentation/public-inbox-xcpdb.pod b/Documentation/public-inbox-xcpdb.pod
index 149c8f78c..7fe1e5fe2 100644
--- a/Documentation/public-inbox-xcpdb.pod
+++ b/Documentation/public-inbox-xcpdb.pod
@@ -45,6 +45,12 @@ too many shards given the capabilities of the current hardware.
These options are passed directly to L<xapian-compact(1)> when
used with C<--compact>.
+=item --no-sync
+
+Disable L<fsync(2)> and L<fdatasync(2)>.
+
+Available in public-inbox 1.6.0 (PENDING).
+
=back
=head1 ENVIRONMENT
diff --git a/lib/PublicInbox/Msgmap.pm b/lib/PublicInbox/Msgmap.pm
index 9d2ef0dc5..839ddf7ca 100644
--- a/lib/PublicInbox/Msgmap.pm
+++ b/lib/PublicInbox/Msgmap.pm
@@ -32,12 +32,11 @@ sub new_file {
my $self = bless { filename => $f }, $class;
my $dbh = $self->{dbh} = PublicInbox::Over::dbh_new($self, $rw);
if ($rw) {
- create_tables($dbh);
-
# TRUNCATE reduces I/O compared to the default (DELETE)
$dbh->do('PRAGMA journal_mode = TRUNCATE');
$dbh->begin_work;
+ create_tables($dbh);
$self->created_at(time) unless $self->created_at;
my $max = $self->max // 0;
@@ -51,12 +50,17 @@ sub new_file {
sub tmp_clone {
my ($self) = @_;
my ($fh, $fn) = tempfile('msgmap-XXXXXXXX', EXLOCK => 0, TMPDIR => 1);
- $self->{dbh}->sqlite_backup_to_file($fn);
- my $tmp = ref($self)->new_file($fn, 1);
- $tmp->{dbh}->do('PRAGMA synchronous = OFF');
- $tmp->{dbh}->do('PRAGMA journal_mode = MEMORY');
+ my $tmp;
+ if ($self->{dbh}->can('sqlite_backup_to_dbh')) {
+ $tmp = ref($self)->new_file($fn, 2);
+ $tmp->{dbh}->do('PRAGMA journal_mode = MEMORY');
+ $self->{dbh}->sqlite_backup_to_dbh($tmp->{dbh});
+ } else { # DBD::SQLite <= 1.61_01
+ $self->{dbh}->sqlite_backup_to_file($fn);
+ $tmp = ref($self)->new_file($fn, 2);
+ $tmp->{dbh}->do('PRAGMA journal_mode = MEMORY');
+ }
$tmp->{pid} = $$;
- close $fh or die "failed to close $fn: $!";
$tmp;
}
@@ -241,8 +245,7 @@ sub atfork_parent {
$self->{pid} or die 'BUG: not a temporary clone';
$self->{dbh} and die 'BUG: tmp_clone dbh not prepared for parent';
defined($self->{filename}) or die 'BUG: {filename} not defined';
- my $dbh = $self->{dbh} = PublicInbox::Over::dbh_new($self, 1);
- $dbh->do('PRAGMA synchronous = OFF');
+ $self->{dbh} = PublicInbox::Over::dbh_new($self, 2);
}
sub atfork_prepare {
diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm
index e3f264564..f32743c05 100644
--- a/lib/PublicInbox/Over.pm
+++ b/lib/PublicInbox/Over.pm
@@ -40,6 +40,7 @@ sub dbh_new {
$st = pack('dd', $st[0], $st[1]);
} while ($st ne $self->{st} && $tries++ < 3);
warn "W: $f: .st_dev, .st_ino unstable\n" if $st ne $self->{st};
+ $dbh->do('PRAGMA synchronous = OFF') if ($rw // 0) > 1;
$dbh;
}
diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm
index c57be7243..fcb450794 100644
--- a/lib/PublicInbox/OverIdx.pm
+++ b/lib/PublicInbox/OverIdx.pm
@@ -21,7 +21,7 @@ use Carp qw(croak);
sub dbh_new {
my ($self) = @_;
- my $dbh = $self->SUPER::dbh_new(1);
+ my $dbh = $self->SUPER::dbh_new($self->{-no_sync} ? 2 : 1);
# TRUNCATE reduces I/O compared to the default (DELETE)
# We do not use WAL since we're optimized for read-only ops,
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index c57a7e164..764257432 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -23,6 +23,7 @@ use PublicInbox::Git qw(git_unquote);
use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp);
my $X = \%PublicInbox::Search::X;
my ($DB_CREATE_OR_OPEN, $DB_OPEN);
+our $DB_NO_SYNC = 0;
our $BATCH_BYTES = defined($ENV{XAPIAN_FLUSH_THRESHOLD}) ?
0x7fffffff : 1_000_000;
use constant DEBUG => !!$ENV{DEBUG};
@@ -67,6 +68,7 @@ sub new {
$self->{lock_path} = "$inboxdir/ssoma.lock";
my $dir = $self->xdir;
$self->{over} = PublicInbox::OverIdx->new("$dir/over.sqlite3");
+ $self->{over}->{-no_sync} = 1 if $ibx->{-no_sync};
$self->{index_max_size} = $ibx->{index_max_size};
} elsif ($version == 2) {
defined $shard or die "shard is required for v2\n";
@@ -103,6 +105,9 @@ sub load_xapian_writable () {
*sortable_serialise = $xap.'::sortable_serialise';
$DB_CREATE_OR_OPEN = eval($xap.'::DB_CREATE_OR_OPEN()');
$DB_OPEN = eval($xap.'::DB_OPEN()');
+ my $ver = (eval($xap.'::major_version()') << 16) |
+ (eval($xap.'::minor_version()') << 8);
+ $DB_NO_SYNC = 0x4 if $ver >= 0x10400;
1;
}
@@ -126,6 +131,7 @@ sub idx_acquire {
}
}
return unless defined $flag;
+ $flag |= $DB_NO_SYNC if $self->{ibx}->{-no_sync};
my $xdb = eval { ($X->{WritableDatabase})->new($dir, $flag) };
if ($@) {
die "Failed opening $dir: ", $@;
@@ -377,7 +383,8 @@ sub _msgmap_init ($) {
die "BUG: _msgmap_init is only for v1\n" if $self->{ibx_ver} != 1;
$self->{mm} //= eval {
require PublicInbox::Msgmap;
- PublicInbox::Msgmap->new($self->{ibx}->{inboxdir}, 1);
+ my $rw = $self->{ibx}->{-no_sync} ? 2 : 1;
+ PublicInbox::Msgmap->new($self->{ibx}->{inboxdir}, $rw);
};
}
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 13c1ad6f8..3dc200956 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -116,12 +116,13 @@ sub new {
total_bytes => 0,
current_info => '',
xpfx => $xpfx,
- over => PublicInbox::OverIdx->new("$xpfx/over.sqlite3", 1),
+ over => PublicInbox::OverIdx->new("$xpfx/over.sqlite3"),
lock_path => "$dir/inbox.lock",
# limit each git repo (epoch) to 1GB or so
rotate_bytes => int((1024 * 1024 * 1024) / $PACKING_FACTOR),
last_commit => [], # git epoch -> commit
};
+ $self->{over}->{-no_sync} = 1 if $v2ibx->{-no_sync};
$self->{shards} = count_shards($self) || nproc_shards($creat);
$self->{index_max_size} = $v2ibx->{index_max_size};
bless $self, $class;
@@ -293,7 +294,8 @@ sub _idx_init { # with_umask callback
# Now that all subprocesses are up, we can open the FDs
# for SQLite:
my $mm = $self->{mm} = PublicInbox::Msgmap->new_file(
- "$self->{ibx}->{inboxdir}/msgmap.sqlite3", 1);
+ "$self->{ibx}->{inboxdir}/msgmap.sqlite3",
+ $self->{ibx}->{-no_sync} ? 2 : 1);
$mm->{dbh}->begin_work;
}
diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm
index 4ee3fc791..d6c069d75 100644
--- a/lib/PublicInbox/Xapcmd.pm
+++ b/lib/PublicInbox/Xapcmd.pm
@@ -412,10 +412,11 @@ sub cpdb ($$) {
# like copydatabase(1), be sure we don't overwrite anything in case
# of other bugs:
- my $creat = eval($PublicInbox::Search::Xap.'::DB_CREATE()');
+ my $flag = eval($PublicInbox::Search::Xap.'::DB_CREATE()');
die if $@;
my $XapianWritableDatabase = $PublicInbox::Search::X{WritableDatabase};
- my $dst = $XapianWritableDatabase->new($tmp, $creat);
+ $flag |= $PublicInbox::SearchIdx::DB_NO_SYNC if !$opt->{sync};
+ my $dst = $XapianWritableDatabase->new($tmp, $flag);
my $pr = $opt->{-progress};
my $pfx = $opt->{-progress_pfx} = progress_pfx($new);
my $pr_data = { pr => $pr, pfx => $pfx, nr => 0 } if $pr;
diff --git a/script/public-inbox-index b/script/public-inbox-index
index 2e1934b08..d5c7cae2b 100755
--- a/script/public-inbox-index
+++ b/script/public-inbox-index
@@ -14,8 +14,8 @@ PublicInbox::Admin::require_or_die('-index');
use PublicInbox::Xapcmd;
my $compact_opt;
-my $opt = { quiet => -1, compact => 0, maxsize => undef };
-GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune
+my $opt = { quiet => -1, compact => 0, maxsize => undef, sync => 1 };
+GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune sync!
indexlevel|L=s maxsize|max-size=s batchsize|batch-size=s))
or die "bad command-line args\n$usage";
die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0;
@@ -59,6 +59,7 @@ for my $ibx (@ibxs) {
if ($opt->{compact} >= 2) {
PublicInbox::Xapcmd::run($ibx, 'compact', $compact_opt);
}
+ $ibx->{-no_sync} = 1 if !$opt->{sync};
PublicInbox::Admin::index_inbox($ibx, undef, $opt);
PublicInbox::Xapcmd::run($ibx, 'compact', $compact_opt) if $compact_opt;
}
diff --git a/script/public-inbox-xcpdb b/script/public-inbox-xcpdb
index 2b9f032c5..fcd961488 100755
--- a/script/public-inbox-xcpdb
+++ b/script/public-inbox-xcpdb
@@ -8,8 +8,8 @@ use PublicInbox::Xapcmd;
use PublicInbox::Admin;
PublicInbox::Admin::require_or_die('-search');
my $usage = "Usage: public-inbox-xcpdb [--compact] INBOX_DIR\n";
-my $opt = {};
-my @opt = (qw(compact reshard|R=i), @PublicInbox::Xapcmd::COMPACT_OPT);
+my $opt = { sync => 1 };
+my @opt = (qw(sync! compact reshard|R=i), @PublicInbox::Xapcmd::COMPACT_OPT);
GetOptions($opt, @opt) or die "bad command-line args\n$usage";
my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV) or die $usage;
foreach (@ibxs) {
next prev parent reply other threads:[~2020-07-24 5:56 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-07-24 5:55 [PATCH 00/20] indexing changes and new features Eric Wong
2020-07-24 5:55 ` [PATCH 01/20] index: support --rethread switch to fix old indices Eric Wong
2020-07-24 5:55 ` [PATCH 02/20] v2: index forwards (via `git log --reverse') Eric Wong
2020-07-24 5:55 ` [PATCH 03/20] v2writable: introduce idx_stack Eric Wong
2020-07-24 5:55 ` [PATCH 04/20] v2writable: index_sync: reduce fill_alternates calls Eric Wong
2020-07-24 5:55 ` [PATCH 05/20] v2writable: move {autime} and {cotime} into $sync state Eric Wong
2020-07-24 5:55 ` [PATCH 06/20] v2writable: allow >= 40 byte git object IDs Eric Wong
2020-07-24 5:55 ` [PATCH 07/20] v2writable: drop "EPOCH.git indexing $RANGE" progress Eric Wong
2020-07-24 5:55 ` [PATCH 08/20] use consistent {ibx} field for writable code paths Eric Wong
2020-07-24 5:55 ` [PATCH 09/20] search: avoid copying {inboxdir} Eric Wong
2020-07-24 5:55 ` [PATCH 10/20] v2writable: use read-only PublicInbox::Git for cat_file Eric Wong
2020-07-24 5:55 ` [PATCH 11/20] v2writable: get rid of {reindex_pipe} field Eric Wong
2020-07-24 5:55 ` [PATCH 12/20] v2writable: clarify "epoch" comment Eric Wong
2020-07-24 5:55 ` [PATCH 13/20] xapcmd: set {from} properly for v1 inboxes Eric Wong
2020-07-24 5:56 ` [PATCH 14/20] searchidx: rename _xdb_{acquire,release} => idx_ Eric Wong
2020-07-24 5:56 ` [PATCH 15/20] searchidx: make v1 indexing closer to v2 Eric Wong
2020-07-24 5:56 ` Eric Wong [this message]
2020-07-24 5:56 ` [PATCH 17/20] v2writable: share log2stack code with v1 Eric Wong
2020-07-24 5:56 ` [PATCH 18/20] searchidx: support async git check Eric Wong
2020-07-24 5:56 ` [PATCH 19/20] searchidx: $batch_cb => v1_checkpoint Eric Wong
2020-07-24 5:56 ` [PATCH 20/20] v2writable: {unindexed} belongs in $sync state Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200724055606.27332-17-e@yhbt.net \
--to=e@yhbt.net \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).