about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
authorEric Wong <e@yhbt.net>2020-07-24 05:56:02 +0000
committerEric Wong <e@yhbt.net>2020-07-25 20:48:18 +0000
commit0e68dbad3dc5e3fbc44e8ba8be576b81455d3359 (patch)
treedfe57fccc97fdf43ce68f3ae6f5cc804a00520df /lib
parentde8e1586d732ae6c09a92588a8e4d442aedbff37 (diff)
downloadpublic-inbox-0e68dbad3dc5e3fbc44e8ba8be576b81455d3359.tar.gz
This allows us to speed up indexing operations to SQLite
and Xapian.

Unfortunately, it doesn't affect operations using
`xapian-compact' and the compactor API, since that doesn't seem
to support Xapian::DB_NO_SYNC, yet.
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/Msgmap.pm21
-rw-r--r--lib/PublicInbox/Over.pm1
-rw-r--r--lib/PublicInbox/OverIdx.pm2
-rw-r--r--lib/PublicInbox/SearchIdx.pm9
-rw-r--r--lib/PublicInbox/V2Writable.pm6
-rw-r--r--lib/PublicInbox/Xapcmd.pm5
6 files changed, 29 insertions, 15 deletions
diff --git a/lib/PublicInbox/Msgmap.pm b/lib/PublicInbox/Msgmap.pm
index 9d2ef0dc..839ddf7c 100644
--- a/lib/PublicInbox/Msgmap.pm
+++ b/lib/PublicInbox/Msgmap.pm
@@ -32,12 +32,11 @@ sub new_file {
         my $self = bless { filename => $f }, $class;
         my $dbh = $self->{dbh} = PublicInbox::Over::dbh_new($self, $rw);
         if ($rw) {
-                create_tables($dbh);
-
                 # TRUNCATE reduces I/O compared to the default (DELETE)
                 $dbh->do('PRAGMA journal_mode = TRUNCATE');
 
                 $dbh->begin_work;
+                create_tables($dbh);
                 $self->created_at(time) unless $self->created_at;
 
                 my $max = $self->max // 0;
@@ -51,12 +50,17 @@ sub new_file {
 sub tmp_clone {
         my ($self) = @_;
         my ($fh, $fn) = tempfile('msgmap-XXXXXXXX', EXLOCK => 0, TMPDIR => 1);
-        $self->{dbh}->sqlite_backup_to_file($fn);
-        my $tmp = ref($self)->new_file($fn, 1);
-        $tmp->{dbh}->do('PRAGMA synchronous = OFF');
-        $tmp->{dbh}->do('PRAGMA journal_mode = MEMORY');
+        my $tmp;
+        if ($self->{dbh}->can('sqlite_backup_to_dbh')) {
+                $tmp = ref($self)->new_file($fn, 2);
+                $tmp->{dbh}->do('PRAGMA journal_mode = MEMORY');
+                $self->{dbh}->sqlite_backup_to_dbh($tmp->{dbh});
+        } else { # DBD::SQLite <= 1.61_01
+                $self->{dbh}->sqlite_backup_to_file($fn);
+                $tmp = ref($self)->new_file($fn, 2);
+                $tmp->{dbh}->do('PRAGMA journal_mode = MEMORY');
+        }
         $tmp->{pid} = $$;
-        close $fh or die "failed to close $fn: $!";
         $tmp;
 }
 
@@ -241,8 +245,7 @@ sub atfork_parent {
         $self->{pid} or die 'BUG: not a temporary clone';
         $self->{dbh} and die 'BUG: tmp_clone dbh not prepared for parent';
         defined($self->{filename}) or die 'BUG: {filename} not defined';
-        my $dbh = $self->{dbh} = PublicInbox::Over::dbh_new($self, 1);
-        $dbh->do('PRAGMA synchronous = OFF');
+        $self->{dbh} = PublicInbox::Over::dbh_new($self, 2);
 }
 
 sub atfork_prepare {
diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm
index e3f26456..f32743c0 100644
--- a/lib/PublicInbox/Over.pm
+++ b/lib/PublicInbox/Over.pm
@@ -40,6 +40,7 @@ sub dbh_new {
                 $st = pack('dd', $st[0], $st[1]);
         } while ($st ne $self->{st} && $tries++ < 3);
         warn "W: $f: .st_dev, .st_ino unstable\n" if $st ne $self->{st};
+        $dbh->do('PRAGMA synchronous = OFF') if ($rw // 0) > 1;
         $dbh;
 }
 
diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm
index c57be724..fcb45079 100644
--- a/lib/PublicInbox/OverIdx.pm
+++ b/lib/PublicInbox/OverIdx.pm
@@ -21,7 +21,7 @@ use Carp qw(croak);
 
 sub dbh_new {
         my ($self) = @_;
-        my $dbh = $self->SUPER::dbh_new(1);
+        my $dbh = $self->SUPER::dbh_new($self->{-no_sync} ? 2 : 1);
 
         # TRUNCATE reduces I/O compared to the default (DELETE)
         # We do not use WAL since we're optimized for read-only ops,
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index c57a7e16..76425743 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -23,6 +23,7 @@ use PublicInbox::Git qw(git_unquote);
 use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp);
 my $X = \%PublicInbox::Search::X;
 my ($DB_CREATE_OR_OPEN, $DB_OPEN);
+our $DB_NO_SYNC = 0;
 our $BATCH_BYTES = defined($ENV{XAPIAN_FLUSH_THRESHOLD}) ?
                         0x7fffffff : 1_000_000;
 use constant DEBUG => !!$ENV{DEBUG};
@@ -67,6 +68,7 @@ sub new {
                 $self->{lock_path} = "$inboxdir/ssoma.lock";
                 my $dir = $self->xdir;
                 $self->{over} = PublicInbox::OverIdx->new("$dir/over.sqlite3");
+                $self->{over}->{-no_sync} = 1 if $ibx->{-no_sync};
                 $self->{index_max_size} = $ibx->{index_max_size};
         } elsif ($version == 2) {
                 defined $shard or die "shard is required for v2\n";
@@ -103,6 +105,9 @@ sub load_xapian_writable () {
         *sortable_serialise = $xap.'::sortable_serialise';
         $DB_CREATE_OR_OPEN = eval($xap.'::DB_CREATE_OR_OPEN()');
         $DB_OPEN = eval($xap.'::DB_OPEN()');
+        my $ver = (eval($xap.'::major_version()') << 16) |
+                (eval($xap.'::minor_version()') << 8);
+        $DB_NO_SYNC = 0x4 if $ver >= 0x10400;
         1;
 }
 
@@ -126,6 +131,7 @@ sub idx_acquire {
                 }
         }
         return unless defined $flag;
+        $flag |= $DB_NO_SYNC if $self->{ibx}->{-no_sync};
         my $xdb = eval { ($X->{WritableDatabase})->new($dir, $flag) };
         if ($@) {
                 die "Failed opening $dir: ", $@;
@@ -377,7 +383,8 @@ sub _msgmap_init ($) {
         die "BUG: _msgmap_init is only for v1\n" if $self->{ibx_ver} != 1;
         $self->{mm} //= eval {
                 require PublicInbox::Msgmap;
-                PublicInbox::Msgmap->new($self->{ibx}->{inboxdir}, 1);
+                my $rw = $self->{ibx}->{-no_sync} ? 2 : 1;
+                PublicInbox::Msgmap->new($self->{ibx}->{inboxdir}, $rw);
         };
 }
 
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 13c1ad6f..3dc20095 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -116,12 +116,13 @@ sub new {
                 total_bytes => 0,
                 current_info => '',
                 xpfx => $xpfx,
-                over => PublicInbox::OverIdx->new("$xpfx/over.sqlite3", 1),
+                over => PublicInbox::OverIdx->new("$xpfx/over.sqlite3"),
                 lock_path => "$dir/inbox.lock",
                 # limit each git repo (epoch) to 1GB or so
                 rotate_bytes => int((1024 * 1024 * 1024) / $PACKING_FACTOR),
                 last_commit => [], # git epoch -> commit
         };
+        $self->{over}->{-no_sync} = 1 if $v2ibx->{-no_sync};
         $self->{shards} = count_shards($self) || nproc_shards($creat);
         $self->{index_max_size} = $v2ibx->{index_max_size};
         bless $self, $class;
@@ -293,7 +294,8 @@ sub _idx_init { # with_umask callback
         # Now that all subprocesses are up, we can open the FDs
         # for SQLite:
         my $mm = $self->{mm} = PublicInbox::Msgmap->new_file(
-                "$self->{ibx}->{inboxdir}/msgmap.sqlite3", 1);
+                                "$self->{ibx}->{inboxdir}/msgmap.sqlite3",
+                                $self->{ibx}->{-no_sync} ? 2 : 1);
         $mm->{dbh}->begin_work;
 }
 
diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm
index 4ee3fc79..d6c069d7 100644
--- a/lib/PublicInbox/Xapcmd.pm
+++ b/lib/PublicInbox/Xapcmd.pm
@@ -412,10 +412,11 @@ sub cpdb ($$) {
 
         # like copydatabase(1), be sure we don't overwrite anything in case
         # of other bugs:
-        my $creat = eval($PublicInbox::Search::Xap.'::DB_CREATE()');
+        my $flag = eval($PublicInbox::Search::Xap.'::DB_CREATE()');
         die if $@;
         my $XapianWritableDatabase = $PublicInbox::Search::X{WritableDatabase};
-        my $dst = $XapianWritableDatabase->new($tmp, $creat);
+        $flag |= $PublicInbox::SearchIdx::DB_NO_SYNC if !$opt->{sync};
+        my $dst = $XapianWritableDatabase->new($tmp, $flag);
         my $pr = $opt->{-progress};
         my $pfx = $opt->{-progress_pfx} = progress_pfx($new);
         my $pr_data = { pr => $pr, pfx => $pfx, nr => 0 } if $pr;