From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.0 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id A02552018A for ; Fri, 17 Jun 2016 00:41:31 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 2/2] watch: introduce watch directive Date: Fri, 17 Jun 2016 00:41:28 +0000 Message-Id: <20160617004128.1037-3-e@80x24.org> In-Reply-To: <20160617004128.1037-1-e@80x24.org> References: <20160617004128.1037-1-e@80x24.org> List-Id: This will allow users to run importers off existing mail accounts where they may not have access to run -mda. Currently, we only support Maildirs, but IMAP ought to be doable. --- MANIFEST | 2 + lib/PublicInbox/Config.pm | 3 +- lib/PublicInbox/WatchMaildir.pm | 141 ++++++++++++++++++++++++++++++++++++++++ script/public-inbox-watch | 16 +++++ 4 files changed, 161 insertions(+), 1 deletion(-) create mode 100644 lib/PublicInbox/WatchMaildir.pm create mode 100755 script/public-inbox-watch diff --git a/MANIFEST b/MANIFEST index fdb92e0..9c8cc1c 100644 --- a/MANIFEST +++ b/MANIFEST @@ -74,6 +74,7 @@ lib/PublicInbox/Thread.pm lib/PublicInbox/Unsubscribe.pm lib/PublicInbox/View.pm lib/PublicInbox/WWW.pm +lib/PublicInbox/WatchMaildir.pm lib/PublicInbox/WwwAttach.pm sa_config/Makefile sa_config/README @@ -85,6 +86,7 @@ script/public-inbox-init script/public-inbox-learn script/public-inbox-mda script/public-inbox-nntpd +script/public-inbox-watch script/public-inbox.cgi scripts/dc-dlvr scripts/dc-dlvr.pre diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index 4651861..43ffba7 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -120,7 +120,8 @@ sub _fill { my ($self, $pfx) = @_; my $rv = {}; - foreach my $k (qw(mainrepo address filter url newsgroup)) { + foreach my $k (qw(mainrepo address filter url newsgroup + watch watchheader)) { my $v = $self->{"$pfx.$k"}; $rv->{$k} = $v if defined $v; } diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm new file mode 100644 index 0000000..b23556a --- /dev/null +++ b/lib/PublicInbox/WatchMaildir.pm @@ -0,0 +1,141 @@ +# Copyright (C) 2016 all contributors +# License: AGPL-3.0+ +package PublicInbox::WatchMaildir; +use strict; +use warnings; +use Email::MIME; +use Email::MIME::ContentType; +$Email::MIME::ContentType::STRICT_PARAMS = 0; # user input is imperfect +use PublicInbox::Git; +use PublicInbox::Import; +use PublicInbox::MDA; + +sub new { + my ($class, $config) = @_; + my (%mdmap, @mdir); + foreach my $k (keys %$config) { + $k =~ /\Apublicinbox\.([^\.]+)\.watch\z/ or next; + my $name = $1; + my $watch = $config->{$k}; + if ($watch =~ s/\Amaildir://) { + $watch =~ s!/+\z!!; + my $inbox = $config->lookup_name($name); + if (my $wm = $inbox->{watchheader}) { + my ($k, $v) = split(/:/, $wm, 2); + $inbox->{-watchheader} = [ $k, qr/\Q$v\E/ ]; + } + my $new = "$watch/new"; + my $cur = "$watch/cur"; + push @mdir, $new, $cur; + $mdmap{$new} = $inbox; + $mdmap{$cur} = $inbox; + } else { + warn "watch unsupported: $k=$watch\n"; + } + } + return unless @mdir; + + my $mdre = join('|', map { quotemeta($_) } @mdir); + $mdre = qr!\A($mdre)/!; + bless { + mdmap => \%mdmap, + mdir => \@mdir, + mdre => $mdre, + importers => {}, + }, $class; +} + +sub _try_fsn_paths { + my ($self, $paths) = @_; + _try_path($self, $_->{path}) foreach @$paths; + $_->done foreach values %{$self->{importers}}; +} + +sub _try_path { + my ($self, $path) = @_; + if ($path !~ $self->{mdre}) { + warn "unrecognized path: $path\n"; + return; + } + my $inbox = $self->{mdmap}->{$1}; + unless ($inbox) { + warn "unmappable dir: $1\n"; + return; + } + my $im = $inbox->{-import} ||= eval { + my $git = $inbox->git; + my $name = $inbox->{name}; + my $addr = $inbox->{-primary_address}; + PublicInbox::Import->new($git, $name, $addr); + }; + $self->{importers}->{"$im"} = $im; + my $mime; + if (open my $fh, '<', $path) { + local $/; + my $str = <$fh>; + $str or return; + $mime = Email::MIME->new(\$str); + } elsif ($!{ENOENT}) { + return; + } else { + warn "failed to open $path: $!\n"; + return; + } + + $mime->header_set($_) foreach @PublicInbox::MDA::BAD_HEADERS; + my $wm = $inbox->{-watchheader}; + if ($wm) { + my $v = $mime->header_obj->header_raw($wm->[0]); + unless ($v && $v =~ $wm->[1]) { + warn "$wm->[0] failed to match $wm->[1]\n"; + return; + } + } + my $f = $inbox->{filter}; + if ($f && $f =~ /::/) { + eval "require $f"; + if ($@) { + warn $@; + } else { + $f = $f->new; + $mime = $f->scrub($mime); + } + } + $mime or return; + my $mid = $mime->header_obj->header_raw('Message-Id'); + $im->add($mime); +} + +sub watch { + my ($self) = @_; + my $cb = sub { _try_fsn_paths($self, \@_) }; + my $mdir = $self->{mdir}; + + require Filesys::Notify::Simple; + my $watcher = Filesys::Notify::Simple->new($mdir); + $watcher->wait($cb) while (1); +} + +sub scan { + my ($self) = @_; + my $mdir = $self->{mdir}; + foreach my $dir (@$mdir) { + my $ok = opendir(my $dh, $dir); + unless ($ok) { + warn "failed to open $dir: $!\n"; + next; + } + while (my $fn = readdir($dh)) { + next unless $fn =~ /\A[a-zA-Z0-9][\w:,=\.]+\z/; + $fn = "$dir/$fn"; + if (-f $fn) { + _try_path($self, $fn); + } else { + warn "not a file: $fn\n"; + } + } + closedir $dh; + } +} + +1; diff --git a/script/public-inbox-watch b/script/public-inbox-watch new file mode 100755 index 0000000..42ae55a --- /dev/null +++ b/script/public-inbox-watch @@ -0,0 +1,16 @@ +#!/usr/bin/perl -w +# Copyright (C) 2016 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +use PublicInbox::WatchMaildir; +use PublicInbox::Config; +my $config = PublicInbox::Config->new; +my $watch_md = PublicInbox::WatchMaildir->new($config); +if ($watch_md) { + my $scan = sub { $watch_md->scan }; + $SIG{USR1} = $scan; + $SIG{ALRM} = sub { $SIG{ALRM} = 'DEFAULT'; $scan->() }; + alarm(1); + $watch_md->watch; +}