user/dev discussion of public-inbox itself
 help / color / Atom feed
* [PATCH] searchidx: disable CoW for SQLite and Xapian under btrfs
@ 2020-07-28 22:21 Eric Wong
  2020-07-29 11:20 ` [PATCH 2/1] xapcmd: -xcpdb and -compact disable CoW, too Eric Wong
  0 siblings, 1 reply; 2+ messages in thread
From: Eric Wong @ 2020-07-28 22:21 UTC (permalink / raw)
  To: meta

SQLite and Xapian files are written randomly, thus they become
fragmented under btrfs with copy-on-write.  This leads to
noticeable performance problems (and probably ENOSPC) as these
files get big.

lore/git (v2, <1GB) indexes around 20% faster with this on an
ancient SSD.  lore/lkml seems to be taking forever and I'll
probably cancel it to save wear on my SSD.

Unfortunately, disabling CoW also means disabling checksumming
(and compression), so we'll be careful to only set the No_COW
attribute on regeneratable data.  We want to keep CoW (and
checksums+compression) on git storage because current ref
storage is neither checksummed nor compressed, and git streams
pack output.
---
 MANIFEST                     |  2 ++
 lib/PublicInbox/NDC_PP.pm    | 29 +++++++++++++++++
 lib/PublicInbox/Over.pm      |  5 +++
 lib/PublicInbox/SearchIdx.pm |  5 ++-
 lib/PublicInbox/Spawn.pm     | 63 ++++++++++++++++++++++++++++++++++--
 t/nodatacow.t                | 34 +++++++++++++++++++
 6 files changed, 134 insertions(+), 4 deletions(-)
 create mode 100644 lib/PublicInbox/NDC_PP.pm
 create mode 100644 t/nodatacow.t

diff --git a/MANIFEST b/MANIFEST
index f46a0776..d312e305 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -156,6 +156,7 @@ lib/PublicInbox/MboxGz.pm
 lib/PublicInbox/MsgIter.pm
 lib/PublicInbox/MsgTime.pm
 lib/PublicInbox/Msgmap.pm
+lib/PublicInbox/NDC_PP.pm
 lib/PublicInbox/NNTP.pm
 lib/PublicInbox/NNTPD.pm
 lib/PublicInbox/NNTPdeflate.pm
@@ -309,6 +310,7 @@ t/multi-mid.t
 t/nntp.t
 t/nntpd-tls.t
 t/nntpd.t
+t/nodatacow.t
 t/nulsubject.t
 t/over.t
 t/plack-2-txt-bodies.eml
diff --git a/lib/PublicInbox/NDC_PP.pm b/lib/PublicInbox/NDC_PP.pm
new file mode 100644
index 00000000..0d20030d
--- /dev/null
+++ b/lib/PublicInbox/NDC_PP.pm
@@ -0,0 +1,29 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Pure-perl class for Linux non-Inline::C users to disable COW for btrfs
+package PublicInbox::NDC_PP;
+use strict;
+use v5.10.1;
+
+sub set_nodatacow ($) {
+	my ($fd) = @_;
+	return if $^O ne 'linux';
+	defined(my $path = readlink("/proc/self/fd/$fd")) or return;
+	open my $mh, '<', '/proc/self/mounts' or return;
+	for (grep(/ btrfs /, <$mh>)) {
+		my (undef, $mnt_path, $type) = split(/ /);
+		next if $type ne 'btrfs'; # in case of false-positive from grep
+
+		# weird chars are escaped as octal
+		$mnt_path =~ s/\\(0[0-9]{2})/chr(oct($1))/egs;
+		$mnt_path .= '/' unless $mnt_path =~ m!/\z!;
+		if (index($path, $mnt_path) == 0) {
+			# error goes to stderr, but non-fatal for us
+			system('chattr', '+C', $path);
+			last;
+		}
+	}
+}
+
+1;
diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm
index f32743c0..0146414c 100644
--- a/lib/PublicInbox/Over.pm
+++ b/lib/PublicInbox/Over.pm
@@ -18,7 +18,12 @@ sub dbh_new {
 	my $f = delete $self->{filename};
 	if (!-f $f) { # SQLite defaults mode to 0644, we want 0666
 		if ($rw) {
+			require PublicInbox::Spawn;
 			open my $fh, '+>>', $f or die "failed to open $f: $!";
+			PublicInbox::Spawn::set_nodatacow(fileno($fh));
+			my $j = "$f-journal";
+			open $fh, '+>>', $j or die "failed to open $j: $!";
+			PublicInbox::Spawn::set_nodatacow(fileno($fh));
 		} else {
 			$self->{filename} = $f; # die on stat() below:
 		}
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 1fc57410..aa8d8ce3 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -125,8 +125,11 @@ sub idx_acquire {
 
 		# don't create empty Xapian directories if we don't need Xapian
 		my $is_shard = defined($self->{shard});
-		if (!$is_shard || ($is_shard && need_xapian($self))) {
+		if (!-d $dir && (!$is_shard ||
+				($is_shard && need_xapian($self)))) {
 			File::Path::mkpath($dir);
+			opendir my $dh, $dir or die "opendir($dir): $!\n";
+			PublicInbox::Spawn::set_nodatacow(fileno($dh));
 		}
 	}
 	return unless defined $flag;
diff --git a/lib/PublicInbox/Spawn.pm b/lib/PublicInbox/Spawn.pm
index db679b77..50f31851 100644
--- a/lib/PublicInbox/Spawn.pm
+++ b/lib/PublicInbox/Spawn.pm
@@ -10,6 +10,9 @@
 # daemons (inside the PSGI code (-httpd) and -nntpd).  The short-lived
 # scripts (-mda, -index, -learn, -init) either use IPC::run or standard
 # Perl routines.
+#
+# There'll probably be more OS-level C stuff here, down the line.
+# We don't want too many DSOs: https://udrepper.livejournal.com/8790.html
 
 package PublicInbox::Spawn;
 use strict;
@@ -25,6 +28,7 @@ my $vfork_spawn = <<'VFORK_SPAWN';
 #include <sys/resource.h>
 #include <unistd.h>
 #include <stdlib.h>
+#include <errno.h>
 
 /* some platforms need alloca.h, but some don't */
 #if defined(__GNUC__) && !defined(alloca)
@@ -144,12 +148,51 @@ int pi_fork_exec(SV *redirref, SV *file, SV *cmdref, SV *envref, SV *rlimref,
 }
 VFORK_SPAWN
 
+# btrfs on Linux is copy-on-write (COW) by default.  As of Linux 5.7,
+# this still leads to fragmentation for SQLite and Xapian files where
+# random I/O happens, so we disable COW just for SQLite files and Xapian
+# directories.  Disabling COW disables checksumming, so we only do this
+# for regeneratable files, and not canonical git storage (git doesn't
+# checksum refs, only data under $GIT_DIR/objects).
+my $set_nodatacow = $^O eq 'linux' ? <<'SET_NODATACOW' : '';
+#include <sys/ioctl.h>
+#include <sys/vfs.h>
+#include <linux/magic.h>
+#include <linux/fs.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+
+void set_nodatacow(int fd)
+{
+	struct statfs buf;
+	int val = 0;
+
+	if (fstatfs(fd, &buf) < 0) {
+		fprintf(stderr, "fstatfs: %s\\n", strerror(errno));
+		return;
+	}
+
+	/* only btrfs is known to have this problem, so skip for non-btrfs */
+	if (buf.f_type != BTRFS_SUPER_MAGIC)
+		return;
+
+	if (ioctl(fd, FS_IOC_GETFLAGS, &val) < 0) {
+		fprintf(stderr, "FS_IOC_GET_FLAGS: %s\\n", strerror(errno));
+		return;
+	}
+	val |= FS_NOCOW_FL;
+	if (ioctl(fd, FS_IOC_SETFLAGS, &val) < 0)
+		fprintf(stderr, "FS_IOC_SET_FLAGS: %s\\n", strerror(errno));
+}
+SET_NODATACOW
+
 my $inline_dir = $ENV{PERL_INLINE_DIRECTORY} //= (
 		$ENV{XDG_CACHE_HOME} //
 		( ($ENV{HOME} // '/nonexistent').'/.cache' )
 	).'/public-inbox/inline-c';
 
-$vfork_spawn = undef unless -d $inline_dir && -w _;
+$set_nodatacow = $vfork_spawn = undef unless -d $inline_dir && -w _;
 if (defined $vfork_spawn) {
 	# Inline 0.64 or later has locking in multi-process env,
 	# but we support 0.5 on Debian wheezy
@@ -158,14 +201,21 @@ if (defined $vfork_spawn) {
 		my $f = "$inline_dir/.public-inbox.lock";
 		open my $fh, '>', $f or die "failed to open $f: $!\n";
 		flock($fh, LOCK_EX) or die "LOCK_EX failed on $f: $!\n";
-		eval 'use Inline C => $vfork_spawn';
+		eval 'use Inline C => $vfork_spawn . $set_nodatacow';
 		my $err = $@;
+		my $ndc_err;
+		if ($err && $set_nodatacow) { # missing Linux kernel headers
+			$ndc_err = $err;
+			undef $set_nodatacow;
+			eval 'use Inline C => $vfork_spawn';
+		}
 		flock($fh, LOCK_UN) or die "LOCK_UN failed on $f: $!\n";
 		die $err if $err;
+		warn $ndc_err if $ndc_err;
 	};
 	if ($@) {
 		warn "Inline::C failed for vfork: $@\n";
-		$vfork_spawn = undef;
+		$set_nodatacow = $vfork_spawn = undef;
 	}
 }
 
@@ -173,6 +223,13 @@ unless (defined $vfork_spawn) {
 	require PublicInbox::SpawnPP;
 	*pi_fork_exec = \&PublicInbox::SpawnPP::pi_fork_exec
 }
+unless ($set_nodatacow) {
+	require PublicInbox::NDC_PP;
+	no warnings 'once';
+	*set_nodatacow = \&PublicInbox::NDC_PP::set_nodatacow;
+}
+undef $set_nodatacow;
+undef $vfork_spawn;
 
 sub which ($) {
 	my ($file) = @_;
diff --git a/t/nodatacow.t b/t/nodatacow.t
new file mode 100644
index 00000000..87b6bdf7
--- /dev/null
+++ b/t/nodatacow.t
@@ -0,0 +1,34 @@
+#!perl -w
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use Test::More;
+use File::Temp qw(tempfile);
+use PublicInbox::TestCommon;
+use PublicInbox::Spawn qw(which);
+use_ok 'PublicInbox::NDC_PP';
+
+SKIP: {
+	my $nr = 2;
+	skip 'test is Linux-only', $nr if $^O ne 'linux';
+	my $dir = $ENV{BTRFS_TESTDIR};
+	skip 'BTRFS_TESTDIR not defined', $nr unless defined $dir;
+	skip 'chattr(1) not installed', $nr unless which('chattr');
+	my $lsattr = which('lsattr') or skip 'lsattr(1) not installed', $nr;
+	my ($fh, $name) = tempfile(DIR => $dir, UNLINK => 1);
+	BAIL_OUT "tempfile: $!" unless $fh && defined($name);
+	my $pp_sub = \&PublicInbox::NDC_PP::set_nodatacow;
+	$pp_sub->(fileno($fh));
+	my $res = xqx([$lsattr, $name]);
+	like($res, qr/C/, "`C' attribute set with pure Perl");
+
+	my $ic_sub = \&PublicInbox::Spawn::set_nodatacow;
+	$pp_sub == $ic_sub and
+		skip 'Inline::C or Linux kernel headers missing', 1;
+	($fh, $name) = tempfile(DIR => $dir, UNLINK => 1);
+	$ic_sub->(fileno($fh));
+	$res = xqx([$lsattr, $name]);
+	like($res, qr/C/, "`C' attribute set with Inline::C");
+};
+
+done_testing;

^ permalink raw reply	[flat|nested] 2+ messages in thread

* [PATCH 2/1] xapcmd: -xcpdb and -compact disable CoW, too
  2020-07-28 22:21 [PATCH] searchidx: disable CoW for SQLite and Xapian under btrfs Eric Wong
@ 2020-07-29 11:20 ` Eric Wong
  0 siblings, 0 replies; 2+ messages in thread
From: Eric Wong @ 2020-07-29 11:20 UTC (permalink / raw)
  To: meta

This gives an opportunity for users already suffering from CoW
fragmentation to at least get the Xapian DBs off CoW.  Aside
from over.sqlite3 in v1, the SQLite DBs remain untouched; though
VACUUM support may come in the future.
---
 lib/PublicInbox/SearchIdx.pm | 11 ++++++++---
 lib/PublicInbox/Xapcmd.pm    |  5 ++++-
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index aa8d8ce3..080aca7c 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -21,7 +21,7 @@ use PublicInbox::OverIdx;
 use PublicInbox::Spawn qw(spawn);
 use PublicInbox::Git qw(git_unquote);
 use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp);
-our @EXPORT_OK = qw(crlf_adjust log2stack is_ancestor check_size);
+our @EXPORT_OK = qw(crlf_adjust log2stack is_ancestor check_size nodatacow_dir);
 my $X = \%PublicInbox::Search::X;
 my ($DB_CREATE_OR_OPEN, $DB_OPEN);
 our $DB_NO_SYNC = 0;
@@ -110,6 +110,12 @@ sub load_xapian_writable () {
 	1;
 }
 
+sub nodatacow_dir ($) {
+	my ($dir) = @_;
+	opendir my $dh, $dir or die "opendir($dir): $!\n";
+	PublicInbox::Spawn::set_nodatacow(fileno($dh));
+}
+
 sub idx_acquire {
 	my ($self) = @_;
 	my $flag;
@@ -128,8 +134,7 @@ sub idx_acquire {
 		if (!-d $dir && (!$is_shard ||
 				($is_shard && need_xapian($self)))) {
 			File::Path::mkpath($dir);
-			opendir my $dh, $dir or die "opendir($dir): $!\n";
-			PublicInbox::Spawn::set_nodatacow(fileno($dh));
+			nodatacow_dir($dir);
 		}
 	}
 	return unless defined $flag;
diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm
index 3b7a581b..f1c80831 100644
--- a/lib/PublicInbox/Xapcmd.pm
+++ b/lib/PublicInbox/Xapcmd.pm
@@ -5,7 +5,7 @@ use strict;
 use warnings;
 use PublicInbox::Spawn qw(which popen_rd);
 use PublicInbox::Over;
-use PublicInbox::SearchIdx;
+use PublicInbox::SearchIdx qw(nodatacow_dir);
 use File::Temp 0.19 (); # ->newdir
 use File::Path qw(remove_tree);
 use File::Basename qw(dirname);
@@ -187,6 +187,7 @@ sub prepare_run {
 		my $v = PublicInbox::Search::SCHEMA_VERSION();
 		my $wip = File::Temp->newdir("xapian$v-XXXXXXXX", DIR => $dir);
 		$tmp->{$old} = $wip;
+		nodatacow_dir($wip->dirname);
 		push @queue, [ $old, $wip ];
 	} else {
 		opendir my $dh, $old or die "Failed to opendir $old: $!\n";
@@ -217,6 +218,7 @@ sub prepare_run {
 			same_fs_or_die($old, $wip->dirname);
 			my $cur = "$old/$dn";
 			push @queue, [ $src // $cur , $wip ];
+			nodatacow_dir($wip->dirname);
 			$tmp->{$cur} = $wip;
 		}
 		# mark old shards to be unlinked
@@ -406,6 +408,7 @@ sub cpdb ($$) {
 		$ft = File::Temp->newdir("$new.compact-XXXXXX", DIR => $dir);
 		setup_signals();
 		$tmp = $ft->dirname;
+		nodatacow_dir($tmp);
 	} else {
 		$tmp = $new;
 	}

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, back to index

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-28 22:21 [PATCH] searchidx: disable CoW for SQLite and Xapian under btrfs Eric Wong
2020-07-29 11:20 ` [PATCH 2/1] xapcmd: -xcpdb and -compact disable CoW, too Eric Wong

user/dev discussion of public-inbox itself

Archives are clonable:
	git clone --mirror https://public-inbox.org/meta
	git clone --mirror http://czquwvybam4bgbro.onion/meta
	git clone --mirror http://hjrcffqmbrq6wope.onion/meta
	git clone --mirror http://ou63pmih66umazou.onion/meta

Example config snippet for mirrors

Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta
	nntp://ou63pmih66umazou.onion/inbox.comp.mail.public-inbox.meta
	nntp://czquwvybam4bgbro.onion/inbox.comp.mail.public-inbox.meta
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.mail.public-inbox.meta
	nntp://news.gmane.io/gmane.mail.public-inbox.general

 note: .onion URLs require Tor: https://www.torproject.org/

AGPL code for this site: git clone https://public-inbox.org/public-inbox.git