user/dev discussion of public-inbox itself
 help / color / Atom feed
* [PATCH] inboxidle: new class to detect inbox changes
@ 2020-05-21  7:15 Eric Wong
  2020-05-21  9:37 ` Eric Wong
  0 siblings, 1 reply; 2+ messages in thread
From: Eric Wong @ 2020-05-21  7:15 UTC (permalink / raw)
  To: meta

This will be used to implement IMAP IDLE, first.

Eventually, it may be used to trigger other things:

* incremental internal updates for manifest.js.gz
* restart `git cat-file' processes on pack index unlink
* IMAP IDLE-like long-polling HTTP endpoint

And maybe more things we haven't thought of, yet.

It uses Linux::Inotify2 or IO::KQueue depending on what packages
are installed and what the kernel supports.  It falls back to
nanosecond-aware Time::HiRes::stat() (available with Perl 5.10.0+)
on systems lacking Linux::Inotify2 and IO::KQueue.

In the future, a pure Perl alternative to Linux::Inotify2 may be
supplied for users of architectures we already support signalfd
and epoll on.
---
 MANIFEST                       |  5 +++
 lib/PublicInbox/FakeInotify.pm | 39 ++++++++++++++++++
 lib/PublicInbox/In2Tie.pm      | 17 ++++++++
 lib/PublicInbox/Inbox.pm       | 20 ++++++++++
 lib/PublicInbox/InboxIdle.pm   | 55 ++++++++++++++++++++++++++
 lib/PublicInbox/KQNotify.pm    | 60 ++++++++++++++++++++++++++++
 lib/PublicInbox/Lock.pm        |  7 +++-
 t/inbox_idle.t                 | 72 ++++++++++++++++++++++++++++++++++
 8 files changed, 274 insertions(+), 1 deletion(-)
 create mode 100644 lib/PublicInbox/FakeInotify.pm
 create mode 100644 lib/PublicInbox/In2Tie.pm
 create mode 100644 lib/PublicInbox/InboxIdle.pm
 create mode 100644 lib/PublicInbox/KQNotify.pm
 create mode 100644 t/inbox_idle.t

diff --git a/MANIFEST b/MANIFEST
index 24f95faa..9aaf9a2d 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -108,6 +108,7 @@ lib/PublicInbox/Emergency.pm
 lib/PublicInbox/Eml.pm
 lib/PublicInbox/EmlContentFoo.pm
 lib/PublicInbox/ExtMsg.pm
+lib/PublicInbox/FakeInotify.pm
 lib/PublicInbox/Feed.pm
 lib/PublicInbox/Filter/Base.pm
 lib/PublicInbox/Filter/Gmane.pm
@@ -125,8 +126,11 @@ lib/PublicInbox/HTTPD/Async.pm
 lib/PublicInbox/HlMod.pm
 lib/PublicInbox/Hval.pm
 lib/PublicInbox/Import.pm
+lib/PublicInbox/In2Tie.pm
 lib/PublicInbox/Inbox.pm
+lib/PublicInbox/InboxIdle.pm
 lib/PublicInbox/InboxWritable.pm
+lib/PublicInbox/KQNotify.pm
 lib/PublicInbox/Linkify.pm
 lib/PublicInbox/Listener.pm
 lib/PublicInbox/Lock.pm
@@ -259,6 +263,7 @@ t/httpd.t
 t/hval.t
 t/import.t
 t/inbox.t
+t/inbox_idle.t
 t/index-git-times.t
 t/indexlevels-mirror-v1.t
 t/indexlevels-mirror.t
diff --git a/lib/PublicInbox/FakeInotify.pm b/lib/PublicInbox/FakeInotify.pm
new file mode 100644
index 00000000..bd610463
--- /dev/null
+++ b/lib/PublicInbox/FakeInotify.pm
@@ -0,0 +1,39 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# for systems lacking Linux::Inotify2 or IO::KQueue, just emulates
+# enough of Linux::Inotify2
+package PublicInbox::FakeInotify;
+use strict;
+use Time::HiRes qw(stat);
+my $IN_CLOSE = 0x08 | 0x10; # match Linux inotify
+
+sub new { bless { watch => {} }, __PACKAGE__ }
+
+# behaves like Linux::Inotify2->watch
+sub watch {
+	my ($self, $path, $mask, $cb) = @_;
+	my @st = stat($path) or return;
+	$self->{watch}->{"$path\0$mask"} = [ @st, $cb ];
+}
+
+# behaves like non-blocking Linux::Inotify2->poll
+sub poll {
+	my ($self) = @_;
+	my $watch = $self->{watch} or return;
+	for my $x (keys %$watch) {
+		my ($path, $mask) = split(/\0/, $x, 2);
+		my @now = stat($path) or next;
+		my $prv = $watch->{$x};
+		my $cb = $prv->[-1];
+		# 10: ctime, 7: size
+		if ($prv->[10] != $now[10]) {
+			if (($mask & $IN_CLOSE) == $IN_CLOSE) {
+				eval { $cb->() };
+			}
+		}
+		@$prv = (@now, $cb);
+	}
+}
+
+1;
diff --git a/lib/PublicInbox/In2Tie.pm b/lib/PublicInbox/In2Tie.pm
new file mode 100644
index 00000000..db1dc104
--- /dev/null
+++ b/lib/PublicInbox/In2Tie.pm
@@ -0,0 +1,17 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# used to ensure PublicInbox::DS can call fileno() as a function
+# on Linux::Inotify2 objects
+package PublicInbox::In2Tie;
+use strict;
+
+sub TIEHANDLE {
+	my ($class, $in2) = @_;
+	bless \$in2, $class; # a scalar reference to an existing reference
+}
+
+# this calls Linux::Inotify2::fileno
+sub FILENO { ${$_[0]}->fileno }
+
+1;
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index 002b980f..2f83b9ae 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -390,4 +390,24 @@ sub altid_map ($) {
 	} // {};
 }
 
+# $obj must respond to >inbox_changed, which takes Inbox ($self) as an arg
+sub subscribe_unlock {
+	my ($self, $ident, $obj) = @_;
+	$self->{over_subs}->{$ident} = $obj;
+}
+
+sub unsubscribe_unlock {
+	my ($self, $ident) = @_;
+	delete $self->{over_subs}->{$ident};
+}
+
+# called by inotify
+sub on_unlock {
+	my ($self) = @_;
+	my $subs = $self->{over_subs} or return;
+	for (values %$subs) {
+		eval { $_->on_inbox_unlock($self) };
+	}
+}
+
 1;
diff --git a/lib/PublicInbox/InboxIdle.pm b/lib/PublicInbox/InboxIdle.pm
new file mode 100644
index 00000000..095a801c
--- /dev/null
+++ b/lib/PublicInbox/InboxIdle.pm
@@ -0,0 +1,55 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+package PublicInbox::InboxIdle;
+use strict;
+use base qw(PublicInbox::DS);
+use fields qw(pi_config inot);
+use Symbol qw(gensym);
+use PublicInbox::Syscall qw(EPOLLIN EPOLLET);
+my $IN_CLOSE = 0x08 | 0x10; # match Linux inotify
+my $ino_cls;
+if ($^O eq 'linux' && eval { require Linux::Inotify2; 1 }) {
+	$IN_CLOSE = Linux::Inotify2::IN_CLOSE();
+	$ino_cls = 'Linux::Inotify2';
+} elsif (eval { require PublicInbox::KQNotify }) {
+	$IN_CLOSE = PublicInbox::KQNotify::IN_CLOSE();
+	$ino_cls = 'PublicInbox::KQNotify';
+}
+require PublicInbox::In2Tie if $ino_cls;
+
+sub in2_arm ($$) { # PublicInbox::Config::each_inbox callback
+	my ($ibx, $inot) = @_;
+	my $path = "$ibx->{inboxdir}/";
+	$path .= $ibx->version >= 2 ? 'inbox.lock' : 'ssoma.lock';
+	$inot->watch($path, $IN_CLOSE, sub { $ibx->on_unlock });
+	# TODO: detect deleted packs (and possibly other files)
+}
+
+sub new {
+	my ($class, $pi_config) = @_;
+	my $self = fields::new($class);
+	my $inot;
+	if ($ino_cls) {
+		$inot = $ino_cls->new or die "E: $ino_cls->new: $!";
+		my $sock = gensym;
+		tie *$sock, 'PublicInbox::In2Tie', $inot;
+		$inot->blocking(0);
+		$inot->on_overflow(undef); # broadcasts everything on overflow
+		$self->SUPER::new($sock, EPOLLIN | EPOLLET);
+	} else {
+		require PublicInbox::FakeInotify;
+		$inot = PublicInbox::FakeInotify->new;
+	}
+	$self->{inot} = $inot;
+	$pi_config->each_inbox(\&in2_arm, $inot);
+	$self;
+}
+
+sub event_step {
+	my ($self) = @_;
+	eval { $self->{inot}->poll }; # Linux::Inotify2::poll
+	warn "$self->{inot}->poll err: $@\n" if $@;
+}
+
+1;
diff --git a/lib/PublicInbox/KQNotify.pm b/lib/PublicInbox/KQNotify.pm
new file mode 100644
index 00000000..3cf9c0f5
--- /dev/null
+++ b/lib/PublicInbox/KQNotify.pm
@@ -0,0 +1,60 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# implements the small subset of Linux::Inotify2 functionality we use
+# using IO::KQueue on *BSD systems.
+package PublicInbox::KQNotify;
+use strict;
+use IO::KQueue;
+use PublicInbox::DSKQXS; # wraps IO::KQueue for fork-safe DESTROY
+
+# only true as far as public-inbox is concerned with .lock files:
+sub IN_CLOSE () { NOTE_WRITE }
+#sub IN_CLOSE () { 0x200 } # NOTE_CLOSE_WRITE (FreeBSD 11+ only)
+
+sub new {
+	my ($class) = @_;
+	bless { dskq => PublicInbox::DSKQXS->new, watch => {} }, $class;
+}
+
+sub watch {
+	my ($self, $path, $mask, $cb) = @_;
+	open(my $fh, '<', $path) or return;
+	my $ident = fileno($fh);
+	$self->{dskq}->{kq}->EV_SET($ident, # ident
+		EVFILT_VNODE, # filter
+		EV_ADD | EV_CLEAR, # flags
+		$mask, # fflags
+		0, 0); # data, udata
+	if ($mask == IN_CLOSE) {
+		$self->{watch}->{$ident} = [ $fh, $cb ];
+	} else {
+		die "TODO Not implemented: $mask";
+	}
+}
+
+# emulate Linux::Inotify::fileno
+sub fileno { ${$_[0]->{dskq}->{kq}} }
+
+# noop for Linux::Inotify2 compatibility.  Unlike inotify,
+# kqueue doesn't seem to overflow since it's limited by the number of
+# open FDs the process has
+sub on_overflow {}
+
+# noop for Linux::Inotify2 compatibility, we use `0' timeout for ->kevent
+sub blocking {}
+
+# behave like Linux::Inotify2::poll
+sub poll {
+	my ($self) = @_;
+	my @kevents = $self->{dskq}->{kq}->kevent(0);
+	for my $kev (@kevents) {
+		my $ident = $kev->[KQ_IDENT];
+		my $mask = $kev->[KQ_FFLAGS];
+		if (($mask & IN_CLOSE) == IN_CLOSE) {
+			eval { $self->{watch}->{$ident}->[1]->() };
+		}
+	}
+}
+
+1;
diff --git a/lib/PublicInbox/Lock.pm b/lib/PublicInbox/Lock.pm
index 032841ed..5a55c9d3 100644
--- a/lib/PublicInbox/Lock.pm
+++ b/lib/PublicInbox/Lock.pm
@@ -14,7 +14,7 @@ sub lock_acquire {
 	my ($self) = @_;
 	croak 'already locked' if $self->{lockfh};
 	my $lock_path = $self->{lock_path} or return;
-	sysopen(my $lockfh, $lock_path, O_WRONLY|O_CREAT) or
+	sysopen(my $lockfh, $lock_path, O_TRUNC|O_WRONLY|O_CREAT) or
 		die "failed to open lock $lock_path: $!\n";
 	flock($lockfh, LOCK_EX) or die "lock failed: $!\n";
 	$self->{lockfh} = $lockfh;
@@ -24,6 +24,11 @@ sub lock_release {
 	my ($self) = @_;
 	return unless $self->{lock_path};
 	my $lockfh = delete $self->{lockfh} or croak 'not locked';
+
+	# NetBSD 8.1 and OpenBSD 6.5 (and maybe other versions/*BSDs) lack
+	# NOTE_CLOSE_WRITE from FreeBSD 11+, so trigger NOTE_WRITE, instead
+	syswrite($lockfh, '.') if $^O ne 'linux';
+
 	flock($lockfh, LOCK_UN) or die "unlock failed: $!\n";
 	close $lockfh or die "close failed: $!\n";
 }
diff --git a/t/inbox_idle.t b/t/inbox_idle.t
new file mode 100644
index 00000000..6bd56113
--- /dev/null
+++ b/t/inbox_idle.t
@@ -0,0 +1,72 @@
+#!perl -w
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use Test::More;
+use PublicInbox::TestCommon;
+use PublicInbox::Config;
+require_mods(qw(DBD::SQLite));
+require PublicInbox::SearchIdx;
+use_ok 'PublicInbox::InboxIdle';
+require_git('2.6');
+use PublicInbox::InboxWritable;
+my ($tmpdir, $for_destroy) = tmpdir();
+
+for my $V (1, 2) {
+	my $inboxdir = "$tmpdir/$V";
+	mkdir $inboxdir or BAIL_OUT("mkdir: $!");
+	my %opt = (
+		inboxdir => $inboxdir,
+		name => 'inbox-idle',
+		version => $V,
+		-primary_address => 'test@example.com',
+		indexlevel => 'basic',
+	);
+	my $ibx = PublicInbox::Inbox->new({ %opt });
+	$ibx = PublicInbox::InboxWritable->new($ibx);
+	my $obj = InboxIdleTestObj->new;
+	$ibx->init_inbox(0);
+	my $im = $ibx->importer(0);
+	if ($V == 1) {
+		my $sidx = PublicInbox::SearchIdx->new($ibx, 1);
+		$sidx->_xdb_acquire;
+		$sidx->set_indexlevel;
+		$sidx->_xdb_release; # allow watching on lockfile
+	}
+	my $pi_config = PublicInbox::Config->new(\<<EOF);
+publicinbox.inbox-idle.inboxdir=$inboxdir
+publicinbox.inbox-idle.indexlevel=basic
+publicinbox.inbox-idle.address=test\@example.com
+EOF
+	my $ident = 'whatever';
+	$pi_config->each_inbox(sub { shift->subscribe_unlock($ident, $obj) });
+	my $ii = PublicInbox::InboxIdle->new($pi_config);
+	ok($ii, 'InboxIdle created');
+	SKIP: {
+		skip('inotify or kqueue missing', 1) unless $ii->{sock};
+		ok(fileno($ii->{sock}) >= 0, 'fileno() gave valid FD');
+	}
+	ok($im->add(eml_load('t/utf8.eml')), "$V added");
+	$im->done;
+	PublicInbox::SearchIdx->new($ibx)->index_sync if $V == 1;
+	$ii->event_step;
+	is(scalar @{$obj->{called}}, 1, 'called on unlock');
+	$pi_config->each_inbox(sub { shift->unsubscribe_unlock($ident) });
+	ok($im->add(eml_load('t/data/0001.patch')), "$V added #2");
+	$im->done;
+	PublicInbox::SearchIdx->new($ibx)->index_sync if $V == 1;
+	$ii->event_step;
+	is(scalar @{$obj->{called}}, 1, 'not called when unsubbed');
+	$ii->close;
+}
+
+done_testing;
+
+package InboxIdleTestObj;
+use strict;
+
+sub new { bless {}, shift }
+
+sub on_inbox_unlock {
+	my ($self, $ibx) = @_;
+	push @{$self->{called}}, $ibx;
+}

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] inboxidle: new class to detect inbox changes
  2020-05-21  7:15 [PATCH] inboxidle: new class to detect inbox changes Eric Wong
@ 2020-05-21  9:37 ` Eric Wong
  0 siblings, 0 replies; 2+ messages in thread
From: Eric Wong @ 2020-05-21  9:37 UTC (permalink / raw)
  To: meta

Eric Wong <e@yhbt.net> wrote:
> --- a/lib/PublicInbox/Inbox.pm

Naming is (still) hard.

> +# $obj must respond to >inbox_changed, which takes Inbox ($self) as an arg
                           ^^^^^^^^^^^^^
			   That should be ->on_inbox_unlock

> +sub subscribe_unlock {
> +	my ($self, $ident, $obj) = @_;
> +	$self->{over_subs}->{$ident} = $obj;
> +}
> +
> +sub unsubscribe_unlock {
> +	my ($self, $ident) = @_;
> +	delete $self->{over_subs}->{$ident};
> +}

{over_subs} might be a bad name, here.  I originally had this
watching over.sqlite3, but started watching the lock, instead.
I figure SQLite3 could (now or in the future) write via mmap
without waking up inotify.

And there may be users who want to serve v1 w/o SQLite...

> +
> +# called by inotify
> +sub on_unlock {
> +	my ($self) = @_;
> +	my $subs = $self->{over_subs} or return;
> +	for (values %$subs) {
> +		eval { $_->on_inbox_unlock($self) };
> +	}

Yes, ->on_inbox_unlock is the correct method right now, not
->inbox_changed

> diff --git a/lib/PublicInbox/Lock.pm b/lib/PublicInbox/Lock.pm
> index 032841ed..5a55c9d3 100644
> --- a/lib/PublicInbox/Lock.pm
> +++ b/lib/PublicInbox/Lock.pm
> @@ -14,7 +14,7 @@ sub lock_acquire {
>  	my ($self) = @_;
>  	croak 'already locked' if $self->{lockfh};
>  	my $lock_path = $self->{lock_path} or return;
> -	sysopen(my $lockfh, $lock_path, O_WRONLY|O_CREAT) or
> +	sysopen(my $lockfh, $lock_path, O_TRUNC|O_WRONLY|O_CREAT) or

I don't think O_TRUNC is necessary, here.  O_TRUNC here bothers
me, since it causes useless I/O traffic and SSD write
amplification on systems, especially where inotify or kqueue are
available.

> @@ -24,6 +24,11 @@ sub lock_release {
>  	my ($self) = @_;
>  	return unless $self->{lock_path};
>  	my $lockfh = delete $self->{lockfh} or croak 'not locked';
> +
> +	# NetBSD 8.1 and OpenBSD 6.5 (and maybe other versions/*BSDs) lack
> +	# NOTE_CLOSE_WRITE from FreeBSD 11+, so trigger NOTE_WRITE, instead
> +	syswrite($lockfh, '.') if $^O ne 'linux';

Yeah, relying on NOTE_WRITE there is kinda gross, but
NOTE_CLOSE_WRITE isn't available on all *BSDs.

> +
> +done_testing;
> +
> +package InboxIdleTestObj;
> +use strict;
> +
> +sub new { bless {}, shift }
> +
> +sub on_inbox_unlock {
> +	my ($self, $ibx) = @_;
> +	push @{$self->{called}}, $ibx;
> +}

Yup, we use ->on_inbox_unlock, not ->inbox_changed

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, back to index

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-21  7:15 [PATCH] inboxidle: new class to detect inbox changes Eric Wong
2020-05-21  9:37 ` Eric Wong

user/dev discussion of public-inbox itself

Archives are clonable:
	git clone --mirror http://public-inbox.org/meta
	git clone --mirror http://czquwvybam4bgbro.onion/meta
	git clone --mirror http://hjrcffqmbrq6wope.onion/meta
	git clone --mirror http://ou63pmih66umazou.onion/meta

Example config snippet for mirrors

Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta
	nntp://ou63pmih66umazou.onion/inbox.comp.mail.public-inbox.meta
	nntp://czquwvybam4bgbro.onion/inbox.comp.mail.public-inbox.meta
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.mail.public-inbox.meta
	nntp://news.gmane.io/gmane.mail.public-inbox.general

 note: .onion URLs require Tor: https://www.torproject.org/

AGPL code for this site: git clone https://public-inbox.org/public-inbox.git