about summary refs log tree commit homepage
path: root/lib/PublicInbox/Spawn.pm
diff options
context:
space:
mode:
authorEric Wong <e@yhbt.net>2020-07-28 22:21:58 +0000
committerEric Wong <e@yhbt.net>2020-07-29 11:32:57 +0000
commitc106504309621b662ce6c7cd914718f7045edca4 (patch)
treec569ad56cfd8e192c9f087faf9a5e13482dcd27f /lib/PublicInbox/Spawn.pm
parenta3391407c960e4bbd825a34b87d053de6ef3767a (diff)
downloadpublic-inbox-c106504309621b662ce6c7cd914718f7045edca4.tar.gz
SQLite and Xapian files are written randomly, thus they become
fragmented under btrfs with copy-on-write.  This leads to
noticeable performance problems (and probably ENOSPC) as these
files get big.

lore/git (v2, <1GB) indexes around 20% faster with this on an
ancient SSD.  lore/lkml seems to be taking forever and I'll
probably cancel it to save wear on my SSD.

Unfortunately, disabling CoW also means disabling checksumming
(and compression), so we'll be careful to only set the No_COW
attribute on regeneratable data.  We want to keep CoW (and
checksums+compression) on git storage because current ref
storage is neither checksummed nor compressed, and git streams
pack output.
Diffstat (limited to 'lib/PublicInbox/Spawn.pm')
-rw-r--r--lib/PublicInbox/Spawn.pm63
1 files changed, 60 insertions, 3 deletions
diff --git a/lib/PublicInbox/Spawn.pm b/lib/PublicInbox/Spawn.pm
index db679b77..50f31851 100644
--- a/lib/PublicInbox/Spawn.pm
+++ b/lib/PublicInbox/Spawn.pm
@@ -10,6 +10,9 @@
 # daemons (inside the PSGI code (-httpd) and -nntpd).  The short-lived
 # scripts (-mda, -index, -learn, -init) either use IPC::run or standard
 # Perl routines.
+#
+# There'll probably be more OS-level C stuff here, down the line.
+# We don't want too many DSOs: https://udrepper.livejournal.com/8790.html
 
 package PublicInbox::Spawn;
 use strict;
@@ -25,6 +28,7 @@ my $vfork_spawn = <<'VFORK_SPAWN';
 #include <sys/resource.h>
 #include <unistd.h>
 #include <stdlib.h>
+#include <errno.h>
 
 /* some platforms need alloca.h, but some don't */
 #if defined(__GNUC__) && !defined(alloca)
@@ -144,12 +148,51 @@ int pi_fork_exec(SV *redirref, SV *file, SV *cmdref, SV *envref, SV *rlimref,
 }
 VFORK_SPAWN
 
+# btrfs on Linux is copy-on-write (COW) by default.  As of Linux 5.7,
+# this still leads to fragmentation for SQLite and Xapian files where
+# random I/O happens, so we disable COW just for SQLite files and Xapian
+# directories.  Disabling COW disables checksumming, so we only do this
+# for regeneratable files, and not canonical git storage (git doesn't
+# checksum refs, only data under $GIT_DIR/objects).
+my $set_nodatacow = $^O eq 'linux' ? <<'SET_NODATACOW' : '';
+#include <sys/ioctl.h>
+#include <sys/vfs.h>
+#include <linux/magic.h>
+#include <linux/fs.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+
+void set_nodatacow(int fd)
+{
+        struct statfs buf;
+        int val = 0;
+
+        if (fstatfs(fd, &buf) < 0) {
+                fprintf(stderr, "fstatfs: %s\\n", strerror(errno));
+                return;
+        }
+
+        /* only btrfs is known to have this problem, so skip for non-btrfs */
+        if (buf.f_type != BTRFS_SUPER_MAGIC)
+                return;
+
+        if (ioctl(fd, FS_IOC_GETFLAGS, &val) < 0) {
+                fprintf(stderr, "FS_IOC_GET_FLAGS: %s\\n", strerror(errno));
+                return;
+        }
+        val |= FS_NOCOW_FL;
+        if (ioctl(fd, FS_IOC_SETFLAGS, &val) < 0)
+                fprintf(stderr, "FS_IOC_SET_FLAGS: %s\\n", strerror(errno));
+}
+SET_NODATACOW
+
 my $inline_dir = $ENV{PERL_INLINE_DIRECTORY} //= (
                 $ENV{XDG_CACHE_HOME} //
                 ( ($ENV{HOME} // '/nonexistent').'/.cache' )
         ).'/public-inbox/inline-c';
 
-$vfork_spawn = undef unless -d $inline_dir && -w _;
+$set_nodatacow = $vfork_spawn = undef unless -d $inline_dir && -w _;
 if (defined $vfork_spawn) {
         # Inline 0.64 or later has locking in multi-process env,
         # but we support 0.5 on Debian wheezy
@@ -158,14 +201,21 @@ if (defined $vfork_spawn) {
                 my $f = "$inline_dir/.public-inbox.lock";
                 open my $fh, '>', $f or die "failed to open $f: $!\n";
                 flock($fh, LOCK_EX) or die "LOCK_EX failed on $f: $!\n";
-                eval 'use Inline C => $vfork_spawn';
+                eval 'use Inline C => $vfork_spawn . $set_nodatacow';
                 my $err = $@;
+                my $ndc_err;
+                if ($err && $set_nodatacow) { # missing Linux kernel headers
+                        $ndc_err = $err;
+                        undef $set_nodatacow;
+                        eval 'use Inline C => $vfork_spawn';
+                }
                 flock($fh, LOCK_UN) or die "LOCK_UN failed on $f: $!\n";
                 die $err if $err;
+                warn $ndc_err if $ndc_err;
         };
         if ($@) {
                 warn "Inline::C failed for vfork: $@\n";
-                $vfork_spawn = undef;
+                $set_nodatacow = $vfork_spawn = undef;
         }
 }
 
@@ -173,6 +223,13 @@ unless (defined $vfork_spawn) {
         require PublicInbox::SpawnPP;
         *pi_fork_exec = \&PublicInbox::SpawnPP::pi_fork_exec
 }
+unless ($set_nodatacow) {
+        require PublicInbox::NDC_PP;
+        no warnings 'once';
+        *set_nodatacow = \&PublicInbox::NDC_PP::set_nodatacow;
+}
+undef $set_nodatacow;
+undef $vfork_spawn;
 
 sub which ($) {
         my ($file) = @_;