about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@yhbt.net>2020-07-24 05:56:02 +0000
committerEric Wong <e@yhbt.net>2020-07-25 20:48:18 +0000
commit0e68dbad3dc5e3fbc44e8ba8be576b81455d3359 (patch)
treedfe57fccc97fdf43ce68f3ae6f5cc804a00520df
parentde8e1586d732ae6c09a92588a8e4d442aedbff37 (diff)
downloadpublic-inbox-0e68dbad3dc5e3fbc44e8ba8be576b81455d3359.tar.gz
This allows us to speed up indexing operations to SQLite
and Xapian.

Unfortunately, it doesn't affect operations using
`xapian-compact' and the compactor API, since that doesn't seem
to support Xapian::DB_NO_SYNC, yet.
-rw-r--r--Documentation/public-inbox-index.pod7
-rw-r--r--Documentation/public-inbox-xcpdb.pod6
-rw-r--r--lib/PublicInbox/Msgmap.pm21
-rw-r--r--lib/PublicInbox/Over.pm1
-rw-r--r--lib/PublicInbox/OverIdx.pm2
-rw-r--r--lib/PublicInbox/SearchIdx.pm9
-rw-r--r--lib/PublicInbox/V2Writable.pm6
-rw-r--r--lib/PublicInbox/Xapcmd.pm5
-rwxr-xr-xscript/public-inbox-index5
-rwxr-xr-xscript/public-inbox-xcpdb4
10 files changed, 47 insertions, 19 deletions
diff --git a/Documentation/public-inbox-index.pod b/Documentation/public-inbox-index.pod
index 08f2fbf4..aeb1b3a3 100644
--- a/Documentation/public-inbox-index.pod
+++ b/Documentation/public-inbox-index.pod
@@ -113,6 +113,13 @@ below.
 
 Available in public-inbox 1.6.0 (PENDING).
 
+=item --no-sync
+
+Disables L<fsync(2)> and L<fdatasync(2)> operations on SQLite
+and Xapian.  This is only effective with Xapian 1.4+.
+
+Available in public-inbox 1.6.0 (PENDING).
+
 =back
 
 =head1 FILES
diff --git a/Documentation/public-inbox-xcpdb.pod b/Documentation/public-inbox-xcpdb.pod
index 149c8f78..7fe1e5fe 100644
--- a/Documentation/public-inbox-xcpdb.pod
+++ b/Documentation/public-inbox-xcpdb.pod
@@ -45,6 +45,12 @@ too many shards given the capabilities of the current hardware.
 These options are passed directly to L<xapian-compact(1)> when
 used with C<--compact>.
 
+=item --no-sync
+
+Disable L<fsync(2)> and L<fdatasync(2)>.
+
+Available in public-inbox 1.6.0 (PENDING).
+
 =back
 
 =head1 ENVIRONMENT
diff --git a/lib/PublicInbox/Msgmap.pm b/lib/PublicInbox/Msgmap.pm
index 9d2ef0dc..839ddf7c 100644
--- a/lib/PublicInbox/Msgmap.pm
+++ b/lib/PublicInbox/Msgmap.pm
@@ -32,12 +32,11 @@ sub new_file {
         my $self = bless { filename => $f }, $class;
         my $dbh = $self->{dbh} = PublicInbox::Over::dbh_new($self, $rw);
         if ($rw) {
-                create_tables($dbh);
-
                 # TRUNCATE reduces I/O compared to the default (DELETE)
                 $dbh->do('PRAGMA journal_mode = TRUNCATE');
 
                 $dbh->begin_work;
+                create_tables($dbh);
                 $self->created_at(time) unless $self->created_at;
 
                 my $max = $self->max // 0;
@@ -51,12 +50,17 @@ sub new_file {
 sub tmp_clone {
         my ($self) = @_;
         my ($fh, $fn) = tempfile('msgmap-XXXXXXXX', EXLOCK => 0, TMPDIR => 1);
-        $self->{dbh}->sqlite_backup_to_file($fn);
-        my $tmp = ref($self)->new_file($fn, 1);
-        $tmp->{dbh}->do('PRAGMA synchronous = OFF');
-        $tmp->{dbh}->do('PRAGMA journal_mode = MEMORY');
+        my $tmp;
+        if ($self->{dbh}->can('sqlite_backup_to_dbh')) {
+                $tmp = ref($self)->new_file($fn, 2);
+                $tmp->{dbh}->do('PRAGMA journal_mode = MEMORY');
+                $self->{dbh}->sqlite_backup_to_dbh($tmp->{dbh});
+        } else { # DBD::SQLite <= 1.61_01
+                $self->{dbh}->sqlite_backup_to_file($fn);
+                $tmp = ref($self)->new_file($fn, 2);
+                $tmp->{dbh}->do('PRAGMA journal_mode = MEMORY');
+        }
         $tmp->{pid} = $$;
-        close $fh or die "failed to close $fn: $!";
         $tmp;
 }
 
@@ -241,8 +245,7 @@ sub atfork_parent {
         $self->{pid} or die 'BUG: not a temporary clone';
         $self->{dbh} and die 'BUG: tmp_clone dbh not prepared for parent';
         defined($self->{filename}) or die 'BUG: {filename} not defined';
-        my $dbh = $self->{dbh} = PublicInbox::Over::dbh_new($self, 1);
-        $dbh->do('PRAGMA synchronous = OFF');
+        $self->{dbh} = PublicInbox::Over::dbh_new($self, 2);
 }
 
 sub atfork_prepare {
diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm
index e3f26456..f32743c0 100644
--- a/lib/PublicInbox/Over.pm
+++ b/lib/PublicInbox/Over.pm
@@ -40,6 +40,7 @@ sub dbh_new {
                 $st = pack('dd', $st[0], $st[1]);
         } while ($st ne $self->{st} && $tries++ < 3);
         warn "W: $f: .st_dev, .st_ino unstable\n" if $st ne $self->{st};
+        $dbh->do('PRAGMA synchronous = OFF') if ($rw // 0) > 1;
         $dbh;
 }
 
diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm
index c57be724..fcb45079 100644
--- a/lib/PublicInbox/OverIdx.pm
+++ b/lib/PublicInbox/OverIdx.pm
@@ -21,7 +21,7 @@ use Carp qw(croak);
 
 sub dbh_new {
         my ($self) = @_;
-        my $dbh = $self->SUPER::dbh_new(1);
+        my $dbh = $self->SUPER::dbh_new($self->{-no_sync} ? 2 : 1);
 
         # TRUNCATE reduces I/O compared to the default (DELETE)
         # We do not use WAL since we're optimized for read-only ops,
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index c57a7e16..76425743 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -23,6 +23,7 @@ use PublicInbox::Git qw(git_unquote);
 use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp);
 my $X = \%PublicInbox::Search::X;
 my ($DB_CREATE_OR_OPEN, $DB_OPEN);
+our $DB_NO_SYNC = 0;
 our $BATCH_BYTES = defined($ENV{XAPIAN_FLUSH_THRESHOLD}) ?
                         0x7fffffff : 1_000_000;
 use constant DEBUG => !!$ENV{DEBUG};
@@ -67,6 +68,7 @@ sub new {
                 $self->{lock_path} = "$inboxdir/ssoma.lock";
                 my $dir = $self->xdir;
                 $self->{over} = PublicInbox::OverIdx->new("$dir/over.sqlite3");
+                $self->{over}->{-no_sync} = 1 if $ibx->{-no_sync};
                 $self->{index_max_size} = $ibx->{index_max_size};
         } elsif ($version == 2) {
                 defined $shard or die "shard is required for v2\n";
@@ -103,6 +105,9 @@ sub load_xapian_writable () {
         *sortable_serialise = $xap.'::sortable_serialise';
         $DB_CREATE_OR_OPEN = eval($xap.'::DB_CREATE_OR_OPEN()');
         $DB_OPEN = eval($xap.'::DB_OPEN()');
+        my $ver = (eval($xap.'::major_version()') << 16) |
+                (eval($xap.'::minor_version()') << 8);
+        $DB_NO_SYNC = 0x4 if $ver >= 0x10400;
         1;
 }
 
@@ -126,6 +131,7 @@ sub idx_acquire {
                 }
         }
         return unless defined $flag;
+        $flag |= $DB_NO_SYNC if $self->{ibx}->{-no_sync};
         my $xdb = eval { ($X->{WritableDatabase})->new($dir, $flag) };
         if ($@) {
                 die "Failed opening $dir: ", $@;
@@ -377,7 +383,8 @@ sub _msgmap_init ($) {
         die "BUG: _msgmap_init is only for v1\n" if $self->{ibx_ver} != 1;
         $self->{mm} //= eval {
                 require PublicInbox::Msgmap;
-                PublicInbox::Msgmap->new($self->{ibx}->{inboxdir}, 1);
+                my $rw = $self->{ibx}->{-no_sync} ? 2 : 1;
+                PublicInbox::Msgmap->new($self->{ibx}->{inboxdir}, $rw);
         };
 }
 
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 13c1ad6f..3dc20095 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -116,12 +116,13 @@ sub new {
                 total_bytes => 0,
                 current_info => '',
                 xpfx => $xpfx,
-                over => PublicInbox::OverIdx->new("$xpfx/over.sqlite3", 1),
+                over => PublicInbox::OverIdx->new("$xpfx/over.sqlite3"),
                 lock_path => "$dir/inbox.lock",
                 # limit each git repo (epoch) to 1GB or so
                 rotate_bytes => int((1024 * 1024 * 1024) / $PACKING_FACTOR),
                 last_commit => [], # git epoch -> commit
         };
+        $self->{over}->{-no_sync} = 1 if $v2ibx->{-no_sync};
         $self->{shards} = count_shards($self) || nproc_shards($creat);
         $self->{index_max_size} = $v2ibx->{index_max_size};
         bless $self, $class;
@@ -293,7 +294,8 @@ sub _idx_init { # with_umask callback
         # Now that all subprocesses are up, we can open the FDs
         # for SQLite:
         my $mm = $self->{mm} = PublicInbox::Msgmap->new_file(
-                "$self->{ibx}->{inboxdir}/msgmap.sqlite3", 1);
+                                "$self->{ibx}->{inboxdir}/msgmap.sqlite3",
+                                $self->{ibx}->{-no_sync} ? 2 : 1);
         $mm->{dbh}->begin_work;
 }
 
diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm
index 4ee3fc79..d6c069d7 100644
--- a/lib/PublicInbox/Xapcmd.pm
+++ b/lib/PublicInbox/Xapcmd.pm
@@ -412,10 +412,11 @@ sub cpdb ($$) {
 
         # like copydatabase(1), be sure we don't overwrite anything in case
         # of other bugs:
-        my $creat = eval($PublicInbox::Search::Xap.'::DB_CREATE()');
+        my $flag = eval($PublicInbox::Search::Xap.'::DB_CREATE()');
         die if $@;
         my $XapianWritableDatabase = $PublicInbox::Search::X{WritableDatabase};
-        my $dst = $XapianWritableDatabase->new($tmp, $creat);
+        $flag |= $PublicInbox::SearchIdx::DB_NO_SYNC if !$opt->{sync};
+        my $dst = $XapianWritableDatabase->new($tmp, $flag);
         my $pr = $opt->{-progress};
         my $pfx = $opt->{-progress_pfx} = progress_pfx($new);
         my $pr_data = { pr => $pr, pfx => $pfx, nr => 0 } if $pr;
diff --git a/script/public-inbox-index b/script/public-inbox-index
index 2e1934b0..d5c7cae2 100755
--- a/script/public-inbox-index
+++ b/script/public-inbox-index
@@ -14,8 +14,8 @@ PublicInbox::Admin::require_or_die('-index');
 use PublicInbox::Xapcmd;
 
 my $compact_opt;
-my $opt = { quiet => -1, compact => 0, maxsize => undef };
-GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune
+my $opt = { quiet => -1, compact => 0, maxsize => undef, sync => 1 };
+GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune sync!
                 indexlevel|L=s maxsize|max-size=s batchsize|batch-size=s))
         or die "bad command-line args\n$usage";
 die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0;
@@ -59,6 +59,7 @@ for my $ibx (@ibxs) {
         if ($opt->{compact} >= 2) {
                 PublicInbox::Xapcmd::run($ibx, 'compact', $compact_opt);
         }
+        $ibx->{-no_sync} = 1 if !$opt->{sync};
         PublicInbox::Admin::index_inbox($ibx, undef, $opt);
         PublicInbox::Xapcmd::run($ibx, 'compact', $compact_opt) if $compact_opt;
 }
diff --git a/script/public-inbox-xcpdb b/script/public-inbox-xcpdb
index 2b9f032c..fcd96148 100755
--- a/script/public-inbox-xcpdb
+++ b/script/public-inbox-xcpdb
@@ -8,8 +8,8 @@ use PublicInbox::Xapcmd;
 use PublicInbox::Admin;
 PublicInbox::Admin::require_or_die('-search');
 my $usage = "Usage: public-inbox-xcpdb [--compact] INBOX_DIR\n";
-my $opt = {};
-my @opt = (qw(compact reshard|R=i), @PublicInbox::Xapcmd::COMPACT_OPT);
+my $opt = { sync => 1 };
+my @opt = (qw(sync! compact reshard|R=i), @PublicInbox::Xapcmd::COMPACT_OPT);
 GetOptions($opt, @opt) or die "bad command-line args\n$usage";
 my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV) or die $usage;
 foreach (@ibxs) {