user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@yhbt.net>
To: meta@public-inbox.org
Subject: [PATCH 09/34] kqnotify|fake_inotify: detect Maildir write ops
Date: Sat, 27 Jun 2020 10:03:35 +0000	[thread overview]
Message-ID: <20200627100400.9871-10-e@yhbt.net> (raw)
In-Reply-To: <20200627100400.9871-1-e@yhbt.net>

We need to detect link(2) and rename(2) in other apps
writing to the Maildir.

We'll be removing the Filesys::Notify::Simple from -watch
in favor of using IO::KQueue or Linux::Inotify2 directly.
Ensure non-inotify emulations can support everything we
expect for Maildir writers.
---
 MANIFEST                       |  2 ++
 lib/PublicInbox/FakeInotify.pm | 46 ++++++++++++++++++++++++++++------
 lib/PublicInbox/KQNotify.pm    | 38 +++++++++++++++++++++++-----
 t/fake_inotify.t               | 45 +++++++++++++++++++++++++++++++++
 t/kqnotify.t                   | 41 ++++++++++++++++++++++++++++++
 5 files changed, 159 insertions(+), 13 deletions(-)
 create mode 100644 t/fake_inotify.t
 create mode 100644 t/kqnotify.t

diff --git a/MANIFEST b/MANIFEST
index 161b6cddbe0..9d1a4e4a8b1 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -253,6 +253,7 @@ t/eml_content_disposition.t
 t/eml_content_type.t
 t/epoll.t
 t/fail-bin/spamc
+t/fake_inotify.t
 t/feed.t
 t/filter_base-junk.eml
 t/filter_base-xhtml.eml
@@ -286,6 +287,7 @@ t/indexlevels-mirror-v1.t
 t/indexlevels-mirror.t
 t/init.t
 t/iso-2202-jp.eml
+t/kqnotify.t
 t/linkify.t
 t/main-bin/spamc
 t/mda-mime.eml
diff --git a/lib/PublicInbox/FakeInotify.pm b/lib/PublicInbox/FakeInotify.pm
index b077d63a4b4..df63173f083 100644
--- a/lib/PublicInbox/FakeInotify.pm
+++ b/lib/PublicInbox/FakeInotify.pm
@@ -6,10 +6,13 @@
 package PublicInbox::FakeInotify;
 use strict;
 use Time::HiRes qw(stat);
+use PublicInbox::DS;
 my $IN_CLOSE = 0x08 | 0x10; # match Linux inotify
+# my $IN_MOVED_TO = 0x80;
+# my $IN_CREATE = 0x100;
+sub MOVED_TO_OR_CREATE () { 0x80 | 0x100 }
 
 my $poll_intvl = 2; # same as Filesys::Notify::Simple
-my $for_cancel = bless \(my $x), 'PublicInbox::FakeInotify::Watch';
 
 sub poll_once {
 	my ($self) = @_;
@@ -30,8 +33,22 @@ sub new {
 sub watch {
 	my ($self, $path, $mask, $cb) = @_;
 	my @st = stat($path) or return;
-	$self->{watch}->{"$path\0$mask"} = [ @st, $cb ];
-	$for_cancel;
+	my $k = "$path\0$mask";
+	$self->{watch}->{$k} = [ $st[10], $cb ]; # 10 - ctime
+	bless [ $self->{watch}, $k ], 'PublicInbox::FakeInotify::Watch';
+}
+
+sub on_new_files ($$$$) {
+	my ($dh, $cb, $path, $old_ctime) = @_;
+	while (defined(my $base = readdir($dh))) {
+		next if $base =~ /\A\.\.?\z/;
+		my $full = "$path/$base";
+		my @st = stat($full);
+		if (@st && $st[10] > $old_ctime) {
+			bless \$full, 'PublicInbox::FakeInotify::Event';
+			eval { $cb->(\$full) };
+		}
+	}
 }
 
 # behaves like non-blocking Linux::Inotify2->poll
@@ -43,17 +60,32 @@ sub poll {
 		my @now = stat($path) or next;
 		my $prv = $watch->{$x};
 		my $cb = $prv->[-1];
-		# 10: ctime, 7: size
-		if ($prv->[10] != $now[10]) {
+		my $old_ctime = $prv->[0];
+		if ($old_ctime != $now[10]) {
 			if (($mask & $IN_CLOSE) == $IN_CLOSE) {
 				eval { $cb->() };
+			} elsif ($mask & MOVED_TO_OR_CREATE) {
+				opendir(my $dh, $path) or do {
+					warn "W: opendir $path: $!\n";
+					next;
+				};
+				on_new_files($dh, $cb, $path, $old_ctime);
 			}
 		}
-		@$prv = (@now, $cb);
+		@$prv = ($now[10], $cb);
 	}
 }
 
 package PublicInbox::FakeInotify::Watch;
-sub cancel {} # noop
+use strict;
+
+sub cancel {
+	my ($self) = @_;
+	delete $self->[0]->{$self->[1]};
+}
+
+package PublicInbox::FakeInotify::Event;
+use strict;
 
+sub fullname { ${$_[0]} }
 1;
diff --git a/lib/PublicInbox/KQNotify.pm b/lib/PublicInbox/KQNotify.pm
index 110594cc02c..9673b44290a 100644
--- a/lib/PublicInbox/KQNotify.pm
+++ b/lib/PublicInbox/KQNotify.pm
@@ -7,6 +7,11 @@ package PublicInbox::KQNotify;
 use strict;
 use IO::KQueue;
 use PublicInbox::DSKQXS; # wraps IO::KQueue for fork-safe DESTROY
+use PublicInbox::FakeInotify;
+use Time::HiRes qw(stat);
+
+# NOTE_EXTEND detects rename(2), NOTE_WRITE detects link(2)
+sub MOVED_TO_OR_CREATE () { NOTE_EXTEND|NOTE_WRITE }
 
 sub new {
 	my ($class) = @_;
@@ -15,19 +20,28 @@ sub new {
 
 sub watch {
 	my ($self, $path, $mask, $cb) = @_;
-	open(my $fh, '<', $path) or return;
+	my ($fh, $cls, @extra);
+	if (-d $path) {
+		opendir($fh, $path) or return;
+		my @st = stat($fh);
+		@extra = ($path, $st[10]); # 10: ctime
+		$cls = 'PublicInbox::KQNotify::Watchdir';
+	} else {
+		open($fh, '<', $path) or return;
+		$cls = 'PublicInbox::KQNotify::Watch';
+	}
 	my $ident = fileno($fh);
 	$self->{dskq}->{kq}->EV_SET($ident, # ident
 		EVFILT_VNODE, # filter
 		EV_ADD | EV_CLEAR, # flags
 		$mask, # fflags
 		0, 0); # data, udata
-	if ($mask == NOTE_WRITE) {
-		$self->{watch}->{$ident} = [ $fh, $cb ];
+	if ($mask == NOTE_WRITE || $mask == MOVED_TO_OR_CREATE) {
+		$self->{watch}->{$ident} = [ $fh, $cb, @extra ];
 	} else {
 		die "TODO Not implemented: $mask";
 	}
-	bless \$fh, 'PublicInbox::KQNotify::Watch';
+	bless \$fh, $cls;
 }
 
 # emulate Linux::Inotify::fileno
@@ -48,8 +62,15 @@ sub poll {
 	for my $kev (@kevents) {
 		my $ident = $kev->[KQ_IDENT];
 		my $mask = $kev->[KQ_FFLAGS];
-		if (($mask & NOTE_WRITE) == NOTE_WRITE) {
-			eval { $self->{watch}->{$ident}->[1]->() };
+		my ($dh, $cb, $path, $old_ctime) = @{$self->{watch}->{$ident}};
+		if (!defined($path) && ($mask & NOTE_WRITE) == NOTE_WRITE) {
+			eval { $cb->() };
+		} elsif ($mask & MOVED_TO_OR_CREATE) {
+			my @new_st = stat($path) or next;
+			$self->{watch}->{$ident}->[3] = $new_st[10]; # ctime
+			rewinddir($dh);
+			PublicInbox::FakeInotify::on_new_files($dh, $cb,
+							$path, $old_ctime);
 		}
 	}
 }
@@ -59,4 +80,9 @@ use strict;
 
 sub cancel { close ${$_[0]} or die "close: $!" }
 
+package PublicInbox::KQNotify::Watchdir;
+use strict;
+
+sub cancel { closedir ${$_[0]} or die "closedir: $!" }
+
 1;
diff --git a/t/fake_inotify.t b/t/fake_inotify.t
new file mode 100644
index 00000000000..f0db0cb58ec
--- /dev/null
+++ b/t/fake_inotify.t
@@ -0,0 +1,45 @@
+#!perl -w
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# Ensure FakeInotify can pick up rename(2) and link(2) operations
+# used by Maildir writing tools
+use strict;
+use Test::More;
+use PublicInbox::TestCommon;
+use_ok 'PublicInbox::FakeInotify';
+my $MIN_FS_TICK = 0.011; # for low-res CONFIG_HZ=100 systems
+my ($tmpdir, $for_destroy) = tmpdir();
+mkdir "$tmpdir/new" or BAIL_OUT "mkdir: $!";
+open my $fh, '>', "$tmpdir/tst" or BAIL_OUT "open: $!";
+close $fh or BAIL_OUT "close: $!";
+
+my $fi = PublicInbox::FakeInotify->new;
+my $mask = PublicInbox::FakeInotify::MOVED_TO_OR_CREATE();
+my $hit = [];
+my $cb = sub { push @$hit, map { $_->fullname } @_ };
+my $w = $fi->watch("$tmpdir/new", $mask, $cb);
+
+select undef, undef, undef, $MIN_FS_TICK;
+rename("$tmpdir/tst", "$tmpdir/new/tst") or BAIL_OUT "rename: $!";
+$fi->poll;
+is_deeply($hit, ["$tmpdir/new/tst"], 'rename(2) detected');
+
+@$hit = ();
+select undef, undef, undef, $MIN_FS_TICK;
+open $fh, '>', "$tmpdir/tst" or BAIL_OUT "open: $!";
+close $fh or BAIL_OUT "close: $!";
+link("$tmpdir/tst", "$tmpdir/new/link") or BAIL_OUT "link: $!";
+$fi->poll;
+is_deeply($hit, ["$tmpdir/new/link"], 'link(2) detected');
+
+$w->cancel;
+@$hit = ();
+select undef, undef, undef, $MIN_FS_TICK;
+link("$tmpdir/new/tst", "$tmpdir/new/link2") or BAIL_OUT "link: $!";
+$fi->poll;
+is_deeply($hit, [], 'link(2) not detected after cancel');
+
+PublicInbox::DS->Reset;
+
+done_testing;
diff --git a/t/kqnotify.t b/t/kqnotify.t
new file mode 100644
index 00000000000..b3414b8ae33
--- /dev/null
+++ b/t/kqnotify.t
@@ -0,0 +1,41 @@
+#!perl -w
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# Ensure KQNotify can pick up rename(2) and link(2) operations
+# used by Maildir writing tools
+use strict;
+use Test::More;
+use PublicInbox::TestCommon;
+plan skip_all => 'KQNotify is only for *BSD systems' if $^O !~ /bsd/;
+require_mods('IO::KQueue');
+use_ok 'PublicInbox::KQNotify';
+my ($tmpdir, $for_destroy) = tmpdir();
+mkdir "$tmpdir/new" or BAIL_OUT "mkdir: $!";
+open my $fh, '>', "$tmpdir/tst" or BAIL_OUT "open: $!";
+close $fh or BAIL_OUT "close: $!";
+
+my $kqn = PublicInbox::KQNotify->new;
+my $mask = PublicInbox::KQNotify::MOVED_TO_OR_CREATE();
+my $hit = [];
+my $cb = sub { push @$hit, map { $_->fullname } @_ };
+my $w = $kqn->watch("$tmpdir/new", $mask, $cb);
+
+rename("$tmpdir/tst", "$tmpdir/new/tst") or BAIL_OUT "rename: $!";
+$kqn->poll;
+is_deeply($hit, ["$tmpdir/new/tst"], 'rename(2) detected (via NOTE_EXTEND)');
+
+@$hit = ();
+open $fh, '>', "$tmpdir/tst" or BAIL_OUT "open: $!";
+close $fh or BAIL_OUT "close: $!";
+link("$tmpdir/tst", "$tmpdir/new/link") or BAIL_OUT "link: $!";
+$kqn->poll;
+is_deeply($hit, ["$tmpdir/new/link"], 'link(2) detected (via NOTE_WRITE)');
+
+$w->cancel;
+@$hit = ();
+link("$tmpdir/new/tst", "$tmpdir/new/link2") or BAIL_OUT "link: $!";
+$kqn->poll;
+is_deeply($hit, [], 'link(2) not detected after cancel');
+
+done_testing;

  parent reply	other threads:[~2020-06-27 10:04 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-06-27 10:03 [PATCH 00/34] watch: add IMAP and NNTP support Eric Wong
2020-06-27 10:03 ` [PATCH 01/34] inboxwritable: ensure ssoma.lock exists on init Eric Wong
2020-06-27 10:03 ` [PATCH 02/34] inbox: warn on ->on_inbox_unlock exception Eric Wong
2020-06-27 10:03 ` [PATCH 03/34] IMAPTracker: Add a helper to track our place in reading imap mailboxes Eric Wong
2020-06-27 10:03 ` [PATCH 04/34] imaptracker: use ~/.local/share/public-inbox/imap.sqlite3 Eric Wong
2020-06-27 10:03 ` [PATCH 05/34] watchmaildir: hoist out compile_watchheaders Eric Wong
2020-06-27 10:03 ` [PATCH 06/34] watchmaildir: fix check for spam vs ham inbox conflicts Eric Wong
2020-06-27 10:03 ` [PATCH 07/34] URI IMAP support Eric Wong
2020-06-27 10:03 ` [PATCH 08/34] watch: preliminary " Eric Wong
2020-06-27 10:03 ` Eric Wong [this message]
2020-06-27 10:03 ` [PATCH 10/34] watch: remove Filesys::Notify::Simple dependency Eric Wong
2020-06-27 10:03 ` [PATCH 11/34] watch: use signalfd for Maildir watching Eric Wong
2020-06-27 19:05   ` Kyle Meyer
2020-06-27 22:32     ` Eric Wong
2020-06-27 10:03 ` [PATCH 12/34] ds: remove fields.pm usage Eric Wong
2020-06-27 10:03 ` [PATCH 13/34] watch: wire up IMAP IDLE reapers to DS Eric Wong
2020-06-27 10:03 ` [PATCH 14/34] watch: support IMAP polling Eric Wong
2020-06-27 10:03 ` [PATCH 15/34] config: support ->urlmatch method for -watch Eric Wong
2020-06-27 10:03 ` [PATCH 16/34] watch: stop importers before forking Eric Wong
2020-06-27 10:03 ` [PATCH 17/34] watch: use UID SEARCH to avoid empty UID FETCH Eric Wong
2020-06-27 10:03 ` [PATCH 18/34] ds: add_timer: allow passing arg to callback Eric Wong
2020-06-27 10:03 ` [PATCH 19/34] imaptracker: add {url} field to reduce args Eric Wong
2020-06-27 10:03 ` [PATCH 20/34] imaptracker: drop {dbname} field Eric Wong
2020-06-27 10:03 ` [PATCH 21/34] watch: avoid long transaction to IMAPTracker Eric Wong
2020-06-27 10:03 ` [PATCH 22/34] watch: support imap.fetchBatchSize parameter Eric Wong
2020-06-27 10:03 ` [PATCH 23/34] watch: imap: be quiet about disconnecting on quit Eric Wong
2020-06-27 10:03 ` [PATCH 24/34] watch: support multiple watch: directives per-inbox Eric Wong
2020-06-27 10:03 ` [PATCH 25/34] watch: remove {mdir} array Eric Wong
2020-06-27 10:03 ` [PATCH 26/34] watch: just use ->urlmatch Eric Wong
2020-06-27 10:03 ` [PATCH 27/34] testcommon: $ENV{TAIL} supports non-@ARGV redirects Eric Wong
2020-06-27 10:03 ` [PATCH 28/34] watch: add NNTP support Eric Wong
2020-06-27 19:06   ` Kyle Meyer
2020-06-27 10:03 ` [PATCH 29/34] watch: show user-specified URL consistently Eric Wong
2020-06-27 10:03 ` [PATCH 30/34] watch: enable autoflush for STDOUT and STDERR Eric Wong
2020-06-27 10:03 ` [PATCH 31/34] watch: use our own "git credential" wrapper Eric Wong
2020-06-27 10:03 ` [PATCH 32/34] watch: support ~/.netrc via Net::Netrc Eric Wong
2020-06-27 10:03 ` [PATCH 33/34] imaptracker: use flock(2) around writes Eric Wong
2020-06-27 10:04 ` [PATCH 34/34] watch: simplify internal structures Eric Wong
2020-06-29 10:34 ` [PATCH 0/5] watch: Maildir fixes Eric Wong
2020-06-29 10:34   ` [PATCH 1/5] watch: check for duplicates in ->over before spamcheck Eric Wong
2020-06-29 10:34   ` [PATCH 2/5] watch: show path for warnings from spam messages Eric Wong
2020-06-29 10:34   ` [PATCH 3/5] watch: ensure SIGCHLD works in forked children Eric Wong
2020-06-29 10:34   ` [PATCH 4/5] spawn: unblock SIGCHLD in subprocess Eric Wong
2020-07-07  6:17     ` [PATCH 6/5] t/spawn: fix test reliability Eric Wong
2020-06-29 10:34   ` [PATCH 5/5] watch: make waitpid() synchronous for Maildir scans Eric Wong
2020-06-29 10:37     ` Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200627100400.9871-10-e@yhbt.net \
    --to=e@yhbt.net \
    --cc=meta@public-inbox.org \
    --subject='Re: [PATCH 09/34] kqnotify|fake_inotify: detect Maildir write ops' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

user/dev discussion of public-inbox itself

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://public-inbox.org/meta
	git clone --mirror http://czquwvybam4bgbro.onion/meta
	git clone --mirror http://hjrcffqmbrq6wope.onion/meta
	git clone --mirror http://ou63pmih66umazou.onion/meta

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V1 meta meta/ https://public-inbox.org/meta \
		meta@public-inbox.org
	public-inbox-index meta

Example config snippet for mirrors.
Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta
	nntp://7fh6tueqddpjyxjmgtdiueylzoqt6pt7hec3pukyptlmohoowvhde4yd.onion/inbox.comp.mail.public-inbox.meta
	nntp://ie5yzdi7fg72h7s4sdcztq5evakq23rdt33mfyfcddc5u3ndnw24ogqd.onion/inbox.comp.mail.public-inbox.meta
	nntp://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/inbox.comp.mail.public-inbox.meta
	nntp://news.gmane.io/gmane.mail.public-inbox.general
 note: .onion URLs require Tor: https://www.torproject.org/

code repositories for project(s) associated with this inbox:

	https://80x24.org/public-inbox.git

AGPL code for this site: git clone https://public-inbox.org/public-inbox.git