user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 15/15] lei: use RENAME_NOREPLACE on Linux 3.15+
Date: Thu, 21 Oct 2021 21:10:32 +0000	[thread overview]
Message-ID: <20211021211032.22666-16-e@80x24.org> (raw)
In-Reply-To: <20211021211032.22666-1-e@80x24.org>

One syscall is better than two for atomicity in Maildirs.  This
means there's no window where another process can see both the
old and new file at the same time (link && unlink), nor a window
where we might inadvertantly clobber an existing file if we were
to do `stat && rename'.
---
 MANIFEST                       |  1 +
 devel/syscall-list             |  8 +++++-
 lib/PublicInbox/LeiExportKw.pm | 19 +++++--------
 lib/PublicInbox/LeiStore.pm    |  8 +++---
 lib/PublicInbox/LeiToMail.pm   |  7 +++--
 lib/PublicInbox/Syscall.pm     | 49 +++++++++++++++++++++++++++++++---
 t/rename_noreplace.t           | 26 ++++++++++++++++++
 7 files changed, 92 insertions(+), 26 deletions(-)
 create mode 100644 t/rename_noreplace.t

diff --git a/MANIFEST b/MANIFEST
index af1522d71bd1..9fd979ef02fb 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -528,6 +528,7 @@ t/psgi_v2.t
 t/purge.t
 t/qspawn.t
 t/reindex-time-range.t
+t/rename_noreplace.t
 t/replace.t
 t/reply.t
 t/run.perl
diff --git a/devel/syscall-list b/devel/syscall-list
index b33401d98ce4..3d55df1fc1d7 100755
--- a/devel/syscall-list
+++ b/devel/syscall-list
@@ -1,4 +1,4 @@
-# Copyright 2021 all contributors <meta@public-inbox.org>
+# Copyright all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <http://www.gnu.org/licenses/agpl-3.0.txt>
 # Dump syscall numbers under Linux and any other kernel which
 # promises stable syscall numbers.  This is to maintain
@@ -9,7 +9,10 @@
 eval 'exec perl -S $0 ${1+"$@"}' # no shebang
 	if 0; # running under some shell
 use strict;
+use v5.10.1;
 use File::Temp 0.19;
+use POSIX qw(uname);
+say '$machine='.(POSIX::uname())[-1];
 my $cc = $ENV{CC} // 'cc';
 my @cflags = split(/\s+/, $ENV{CFLAGS} // '-Wall');
 my $str = do { local $/; <DATA> };
@@ -43,6 +46,9 @@ int main(void)
 	D(SYS_inotify_add_watch);
 	D(SYS_inotify_rm_watch);
 	D(SYS_prctl);
+#ifdef SYS_renameat2
+	D(SYS_renameat2);
+#endif
 #endif /* Linux, any other OSes with stable syscalls? */
 	printf("size_t=%zu off_t=%zu\n", sizeof(size_t), sizeof(off_t));
 	return 0;
diff --git a/lib/PublicInbox/LeiExportKw.pm b/lib/PublicInbox/LeiExportKw.pm
index 0b65c2762633..ceeef7f21d54 100644
--- a/lib/PublicInbox/LeiExportKw.pm
+++ b/lib/PublicInbox/LeiExportKw.pm
@@ -7,6 +7,7 @@ use strict;
 use v5.10.1;
 use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
 use Errno qw(EEXIST ENOENT);
+use PublicInbox::Syscall qw(rename_noreplace);
 
 sub export_kw_md { # LeiMailSync->each_src callback
 	my ($oidbin, $id, $self, $mdir) = @_;
@@ -30,30 +31,22 @@ sub export_kw_md { # LeiMailSync->each_src callback
 	my $lei = $self->{lei};
 	for my $d (@try) {
 		my $src = "$mdir/$d/$$id";
-
-		# we use link(2) + unlink(2) since rename(2) may
-		# inadvertently clobber if the "uniquefilename" part wasn't
-		# actually unique.
-		if (link($src, $dst)) { # success
-			# unlink(2) may ENOENT from parallel invocation,
-			# ignore it, but not other serious errors
-			if (!unlink($src) and $! != ENOENT) {
-				$lei->child_error(1, "E: unlink($src): $!");
-			}
+		if (rename_noreplace($src, $dst)) { # success
 			$self->{lms}->mv_src("maildir:$mdir",
 						$oidbin, $id, $bn);
-			return; # success anyways if link(2) worked
+			return; # success
 		} elsif ($! == EEXIST) { # lost race with lei/store?
 			return;
 		} elsif ($! != ENOENT) {
-			$lei->child_error(1, "E: link($src -> $dst): $!");
+			$lei->child_error(1,
+				"E: rename_noreplace($src -> $dst): $!");
 		} # else loop @try
 	}
 	my $e = $!;
 	# both tries failed
 	my $oidhex = unpack('H*', $oidbin);
 	my $src = "$mdir/{".join(',', @try)."}/$$id";
-	$lei->child_error(1, "link($src -> $dst) ($oidhex): $e");
+	$lei->child_error(1, "rename_noreplace($src -> $dst) ($oidhex): $e");
 	for (@try) { return if -e "$mdir/$_/$$id" }
 	$self->{lms}->clear_src("maildir:$mdir", $id);
 }
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index 16e7d302dc2f..f1316229bb32 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -32,6 +32,7 @@ use POSIX ();
 use IO::Handle (); # ->autoflush
 use Sys::Syslog qw(syslog openlog);
 use Errno qw(EEXIST ENOENT);
+use PublicInbox::Syscall qw(rename_noreplace);
 
 sub new {
 	my (undef, $dir, $opt) = @_;
@@ -185,10 +186,7 @@ sub export1_kw_md ($$$$$) {
 	my $dst = "$mdir/cur/$bn";
 	for my $d (@try) {
 		my $src = "$mdir/$d/$orig";
-		if (link($src, $dst)) {
-			if (!unlink($src) and $! != ENOENT) {
-				syslog('warning', "unlink($src): $!");
-			}
+		if (rename_noreplace($src, $dst)) {
 			# TODO: verify oidbin?
 			$self->{lms}->mv_src("maildir:$mdir",
 					$oidbin, \$orig, $bn);
@@ -196,7 +194,7 @@ sub export1_kw_md ($$$$$) {
 		} elsif ($! == EEXIST) { # lost race with "lei export-kw"?
 			return;
 		} elsif ($! != ENOENT) {
-			syslog('warning', "link($src -> $dst): $!");
+			syslog('warning', "rename_noreplace($src -> $dst): $!");
 		}
 	}
 	for (@try) { return if -e "$mdir/$_/$orig" };
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index ca4e92de48b7..d33d27aec006 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -12,6 +12,7 @@ use PublicInbox::Spawn qw(spawn);
 use Symbol qw(gensym);
 use IO::Handle; # ->autoflush
 use Fcntl qw(SEEK_SET SEEK_END O_CREAT O_EXCL O_WRONLY);
+use PublicInbox::Syscall qw(rename_noreplace);
 
 my %kw2char = ( # Maildir characters
 	draft => 'D',
@@ -262,10 +263,8 @@ sub _buf2maildir ($$$$) {
 		$rand = '';
 		do {
 			$base = $rand.$common.':2,'.kw2suffix($kw);
-		} while (!($ok = link($tmp, $dst.$base)) && $!{EEXIST} &&
-			($rand = _rand.','));
-		die "link($tmp, $dst$base): $!" unless $ok;
-		unlink($tmp) or warn "W: failed to unlink $tmp: $!\n";
+		} while (!($ok = rename_noreplace($tmp, $dst.$base)) &&
+			$!{EEXIST} && ($rand = _rand.','));
 		\$base;
 	} else {
 		my $err = "Error writing $smsg->{blob} to $dst: $!\n";
diff --git a/lib/PublicInbox/Syscall.pm b/lib/PublicInbox/Syscall.pm
index 7ab4291119ea..c00385b94db8 100644
--- a/lib/PublicInbox/Syscall.pm
+++ b/lib/PublicInbox/Syscall.pm
@@ -13,8 +13,9 @@
 # License or the Artistic License, as specified in the Perl README file.
 package PublicInbox::Syscall;
 use strict;
+use v5.10.1;
 use parent qw(Exporter);
-use POSIX qw(ENOSYS O_NONBLOCK);
+use POSIX qw(ENOENT EEXIST ENOSYS O_NONBLOCK);
 use Config;
 
 # $VERSION = '0.25'; # Sys::Syscall version
@@ -22,7 +23,7 @@ our @EXPORT_OK = qw(epoll_ctl epoll_create epoll_wait
                   EPOLLIN EPOLLOUT EPOLLET
                   EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD
                   EPOLLONESHOT EPOLLEXCLUSIVE
-                  signalfd);
+                  signalfd rename_noreplace);
 our %EXPORT_TAGS = (epoll => [qw(epoll_ctl epoll_create epoll_wait
                              EPOLLIN EPOLLOUT
                              EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD
@@ -64,13 +65,16 @@ our (
      $SYS_epoll_ctl,
      $SYS_epoll_wait,
      $SYS_signalfd4,
+     $SYS_renameat2,
      );
 
 my $SFD_CLOEXEC = 02000000; # Perl does not expose O_CLOEXEC
 our $no_deprecated = 0;
 
 if ($^O eq "linux") {
-    my $machine = (POSIX::uname())[-1];
+    my (undef, undef, $release, undef, $machine) = POSIX::uname();
+    my ($maj, $min) = ($release =~ /\A([0-9]+)\.([0-9]+)/);
+    $SYS_renameat2 = 0 if "$maj.$min" < 3.15;
     # whether the machine requires 64-bit numbers to be on 8-byte
     # boundaries.
     my $u64_mod_8 = 0;
@@ -91,22 +95,26 @@ if ($^O eq "linux") {
         $SYS_epoll_ctl    = 255;
         $SYS_epoll_wait   = 256;
         $SYS_signalfd4 = 327;
+        $SYS_renameat2 //= 353;
     } elsif ($machine eq "x86_64") {
         $SYS_epoll_create = 213;
         $SYS_epoll_ctl    = 233;
         $SYS_epoll_wait   = 232;
         $SYS_signalfd4 = 289;
+	$SYS_renameat2 //= 316;
     } elsif ($machine eq 'x32') {
         $SYS_epoll_create = 1073742037;
         $SYS_epoll_ctl = 1073742057;
         $SYS_epoll_wait = 1073742056;
         $SYS_signalfd4 = 1073742113;
+	$SYS_renameat2 //= 0x40000000 + 316;
     } elsif ($machine eq 'sparc64') {
 	$SYS_epoll_create = 193;
 	$SYS_epoll_ctl = 194;
 	$SYS_epoll_wait = 195;
 	$u64_mod_8 = 1;
 	$SYS_signalfd4 = 317;
+	$SYS_renameat2 //= 345;
 	$SFD_CLOEXEC = 020000000;
     } elsif ($machine =~ m/^parisc/) {
         $SYS_epoll_create = 224;
@@ -120,18 +128,21 @@ if ($^O eq "linux") {
         $SYS_epoll_wait   = 238;
         $u64_mod_8        = 1;
         $SYS_signalfd4 = 313;
+	$SYS_renameat2 //= 357;
     } elsif ($machine eq "ppc") {
         $SYS_epoll_create = 236;
         $SYS_epoll_ctl    = 237;
         $SYS_epoll_wait   = 238;
         $u64_mod_8        = 1;
         $SYS_signalfd4 = 313;
+	$SYS_renameat2 //= 357;
     } elsif ($machine =~ m/^s390/) {
         $SYS_epoll_create = 249;
         $SYS_epoll_ctl    = 250;
         $SYS_epoll_wait   = 251;
         $u64_mod_8        = 1;
         $SYS_signalfd4 = 322;
+	$SYS_renameat2 //= 347;
     } elsif ($machine eq "ia64") {
         $SYS_epoll_create = 1243;
         $SYS_epoll_ctl    = 1244;
@@ -153,6 +164,7 @@ if ($^O eq "linux") {
         $u64_mod_8        = 1;
         $no_deprecated    = 1;
         $SYS_signalfd4 = 74;
+	$SYS_renameat2 //= 276;
     } elsif ($machine =~ m/arm(v\d+)?.*l/) {
         # ARM OABI
         $SYS_epoll_create = 250;
@@ -160,18 +172,21 @@ if ($^O eq "linux") {
         $SYS_epoll_wait   = 252;
         $u64_mod_8        = 1;
         $SYS_signalfd4 = 355;
+	$SYS_renameat2 //= 382;
     } elsif ($machine =~ m/^mips64/) {
         $SYS_epoll_create = 5207;
         $SYS_epoll_ctl    = 5208;
         $SYS_epoll_wait   = 5209;
         $u64_mod_8        = 1;
         $SYS_signalfd4 = 5283;
+	$SYS_renameat2 //= 5311;
     } elsif ($machine =~ m/^mips/) {
         $SYS_epoll_create = 4248;
         $SYS_epoll_ctl    = 4249;
         $SYS_epoll_wait   = 4250;
         $u64_mod_8        = 1;
         $SYS_signalfd4 = 4324;
+	$SYS_renameat2 //= 4351;
     } else {
         # as a last resort, try using the *.ph files which may not
         # exist or may be wrong
@@ -280,6 +295,34 @@ sub signalfd ($$) {
 	}
 }
 
+sub _rename_noreplace_racy ($$) {
+	my ($old, $new) = @_;
+	if (link($old, $new)) {
+		warn "unlink $old: $!\n" if !unlink($old) && $! != ENOENT;
+		1
+	} else {
+		undef;
+	}
+}
+
+# TODO: support FD args?
+sub rename_noreplace ($$) {
+	my ($old, $new) = @_;
+	if ($SYS_renameat2) { # RENAME_NOREPLACE = 1, AT_FDCWD = -100
+		my $ret = syscall($SYS_renameat2, -100, $old, -100, $new, 1);
+		if ($ret == 0) {
+			1; # like rename() perlop
+		} elsif ($! == ENOSYS) {
+			undef $SYS_renameat2;
+			_rename_noreplace_racy($old, $new);
+		} else {
+			undef
+		}
+	} else {
+		_rename_noreplace_racy($old, $new);
+	}
+}
+
 1;
 
 =head1 WARRANTY
diff --git a/t/rename_noreplace.t b/t/rename_noreplace.t
new file mode 100644
index 000000000000..bd1c4e9236a7
--- /dev/null
+++ b/t/rename_noreplace.t
@@ -0,0 +1,26 @@
+#!perl -w
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use v5.10.1;
+use PublicInbox::TestCommon;
+use_ok 'PublicInbox::Syscall', 'rename_noreplace';
+my ($tmpdir, $for_destroy) = tmpdir;
+
+open my $fh, '>', "$tmpdir/a" or xbail $!;
+my @sa = stat($fh);
+is(rename_noreplace("$tmpdir/a", "$tmpdir/b"), 1, 'rename_noreplace');
+my @sb = stat("$tmpdir/b");
+ok(scalar(@sb), 'new file exists');
+ok(!-e "$tmpdir/a", 'original gone');
+is("@sa[0,1]", "@sb[0,1]", 'same st_dev + st_ino');
+
+is(rename_noreplace("$tmpdir/a", "$tmpdir/c"), undef, 'undef on ENOENT');
+ok($!{ENOENT}, 'ENOENT set when missing');
+
+open $fh, '>', "$tmpdir/a" or xbail $!;
+is(rename_noreplace("$tmpdir/a", "$tmpdir/b"), undef, 'undef on EEXIST');
+ok($!{EEXIST}, 'EEXIST set when missing');
+is_deeply([stat("$tmpdir/b")], \@sb, 'target unchanged on EEXIST');
+
+done_testing;

      parent reply	other threads:[~2021-10-21 21:10 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-21 21:10 [PATCH 00/15] use RENAME_NOREPLACE on Linux 3.15+ Eric Wong
2021-10-21 21:10 ` [PATCH 01/15] t/lei-{auto-watch,export-kw}: extra diagnostics on failure Eric Wong
2021-10-21 21:10 ` [PATCH 02/15] t/lei-import-maildir: rename fix (SR -> RS) Eric Wong
2021-10-21 21:10 ` [PATCH 03/15] t/lei-p2q: extra diagnostics Eric Wong
2023-09-21 10:23   ` Eric Wong
2021-10-21 21:10 ` [PATCH 04/15] lei/store: check for any unexpected process death Eric Wong
2021-10-21 21:10 ` [PATCH 05/15] lei note-event: drop unnecessary eval guard Eric Wong
2021-10-21 21:10 ` [PATCH 06/15] lei note-event: wq_io_do => wq_do Eric Wong
2021-10-21 21:10 ` [PATCH 07/15] lei_search: try harder to associate "lei index"-ed messages Eric Wong
2021-10-21 21:10 ` [PATCH 08/15] watch: check for {quit} before IDLE Eric Wong
2021-10-21 21:10 ` [PATCH 09/15] watch: remove redundant signal mask manipulation Eric Wong
2021-10-21 21:10 ` [PATCH 10/15] doc: lei-overview: add CAVEATS section Eric Wong
2021-10-21 21:10 ` [PATCH 11/15] lei note-event: clear_src on ENOENT Eric Wong
2021-10-21 21:10 ` [PATCH 12/15] dir_idle: treat IN_MOVED_FROM as a gone event Eric Wong
2021-10-21 21:10 ` [PATCH 13/15] lei: no Perl FileHandle for `undef' w/ ECONNRESET Eric Wong
2021-10-21 21:10 ` [PATCH 14/15] lei_mail_sync: mv_src: use transaction, check UNIQUE Eric Wong
2021-10-21 21:10 ` Eric Wong [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211021211032.22666-16-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).