From 53a8e32b97985803e9de12c4312a86a8850208b3 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 16 May 2019 19:22:46 -0500 Subject: Config.pm: Add support for mailing list information The world has turned since I first started following mailing lists and to my surprise every mailing list that I am subscribed to properly sets the "List-ID:" mailing list header. So instead of doing something clever and flexible I am adding support for looking up public inbox mailing lists by their mailing list name. That makes the work needed for each email trivial and easy to understand. - Parse the "List-ID:" header. - Lookup in the configuration which mailbox is connected to that "List-ID:" - Deliver the mail to that mailbox. To that end this change enhances PublicInbox to have an additional mailbox configuration parameter "listid" that holds the mailing list name. A method is added to the PublicInbox config object called lookup_list_id that given a mailing list name will return the PublicInbox in the configuration that is configured to handle that mailing list. Signed-off-by: "Eric W. Biederman" [ew: avoid autovivification of $ibx->{listid} for t/config.t] --- lib/PublicInbox/Config.pm | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index 4fcb20d2..c2fa40f9 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -25,6 +25,7 @@ sub new { # caches $self->{-by_addr} ||= {}; + $self->{-by_list_id} ||= {}; $self->{-by_name} ||= {}; $self->{-by_newsgroup} ||= {}; $self->{-no_obfuscate} ||= {}; @@ -84,6 +85,33 @@ sub lookup { _fill($self, $pfx); } +sub lookup_list_id { + my ($self, $list_id) = @_; + $list_id = lc($list_id); + my $ibx = $self->{-by_list_id}->{$list_id}; + return $ibx if $ibx; + + my $pfx; + + foreach my $k (keys %$self) { + $k =~ /\A(publicinbox\.[\w-]+)\.listid\z/ or next; + my $v = $self->{$k}; + if (ref($v) eq "ARRAY") { + foreach my $alias (@$v) { + (lc($alias) eq $list_id) or next; + $pfx = $1; + last; + } + } else { + (lc($v) eq $list_id) or next; + $pfx = $1; + last; + } + } + defined $pfx or return; + _fill($self, $pfx); +} + sub lookup_name ($$) { my ($self, $name) = @_; $self->{-by_name}->{$name} || _fill($self, "publicinbox.$name"); @@ -398,7 +426,7 @@ sub _fill { } # TODO: more arrays, we should support multi-value for # more things to encourage decentralization - foreach my $k (qw(address altid nntpmirror coderepo hide)) { + foreach my $k (qw(address altid nntpmirror coderepo hide listid)) { if (defined(my $v = $self->{"$pfx.$k"})) { $ibx->{$k} = _array($v); } @@ -421,6 +449,11 @@ sub _fill { $self->{-by_addr}->{$lc_addr} = $ibx; $self->{-no_obfuscate}->{$lc_addr} = 1; } + if (my $listids = $ibx->{listid}) { + foreach my $list_id (@$listids) { + $self->{-by_list_id}->{$list_id} = $ibx; + } + } if (my $ng = $ibx->{newsgroup}) { $self->{-by_newsgroup}->{$ng} = $ibx; } -- cgit v1.2.3-24-ge0c7 From 1317fb7b4ace03f6d9dfb1a42ee5f9371a1bf913 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 15 Oct 2019 00:38:06 +0000 Subject: config: we always have {-section_order} Rewrite a bunch of tests to use ordered input (emulating "git config -l" output) so we can always walk sections in the order they were given in the config file. --- lib/PublicInbox/Config.pm | 60 +++++++++++++++++++------------------- t/config.t | 73 ++++++++++++++++++++++++----------------------- t/config_limiter.t | 24 ++++++++-------- t/psgi_attach.t | 8 +++--- t/psgi_bad_mids.t | 10 +++---- t/psgi_mount.t | 8 +++--- t/psgi_multipart_not.t | 10 +++---- t/psgi_scan_all.t | 12 ++++---- t/psgi_search.t | 8 +++--- t/psgi_text.t | 8 +++--- t/psgi_v2.t | 10 +++---- t/watch_filter_rubylang.t | 20 ++++++------- t/watch_maildir.t | 14 ++++----- t/watch_maildir_v2.t | 28 +++++++++--------- 14 files changed, 148 insertions(+), 145 deletions(-) diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index c2fa40f9..b7e03af3 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -20,9 +20,14 @@ sub _array ($) { ref($_[0]) eq 'ARRAY' ? $_[0] : [ $_[0] ] } sub new { my ($class, $file) = @_; $file = default_file() unless defined($file); - $file = ref $file ? $file : git_config_dump($file); - my $self = bless $file, $class; - + my $self; + if (ref($file) eq 'SCALAR') { # used by some tests + open my $fh, '<', $file or die; # PerlIO::scalar + $self = config_fh_parse($fh, "\n", '='); + } else { + $self = git_config_dump($file); + } + bless $self, $class; # caches $self->{-by_addr} ||= {}; $self->{-by_list_id} ||= {}; @@ -119,22 +124,12 @@ sub lookup_name ($$) { sub each_inbox { my ($self, $cb) = @_; - if (my $section_order = $self->{-section_order}) { - foreach my $section (@$section_order) { - next if $section !~ m!\Apublicinbox\.([^/]+)\z!; - $self->{"publicinbox.$1.mainrepo"} or next; - my $ibx = lookup_name($self, $1) or next; - $cb->($ibx); - } - } else { - my %seen; - foreach my $k (keys %$self) { - $k =~ m!\Apublicinbox\.([^/]+)\.mainrepo\z! or next; - next if $seen{$1}; - $seen{$1} = 1; - my $ibx = lookup_name($self, $1) or next; - $cb->($ibx); - } + # may auto-vivify if config file is non-existent: + foreach my $section (@{$self->{-section_order}}) { + next if $section !~ m!\Apublicinbox\.([^/]+)\z!; + $self->{"publicinbox.$1.mainrepo"} or next; + my $ibx = lookup_name($self, $1) or next; + $cb->($ibx); } } @@ -175,19 +170,14 @@ sub default_file { config_dir() . '/config'; } -sub git_config_dump { - my ($file) = @_; - my (%section_seen, @section_order); - return {} unless -e $file; - my @cmd = (qw/git config -z -l/, "--file=$file"); - my $cmd = join(' ', @cmd); - my $fh = popen_rd(\@cmd) or die "popen_rd failed for $file: $!\n"; +sub config_fh_parse ($$$) { + my ($fh, $rs, $fs) = @_; my %rv; - local $/ = "\0"; + my (%section_seen, @section_order); + local $/ = $rs; while (defined(my $line = <$fh>)) { chomp $line; - my ($k, $v) = split(/\n/, $line, 2); - + my ($k, $v) = split($fs, $line, 2); my ($section) = ($k =~ /\A(\S+)\.[^\.]+\z/); unless (defined $section_seen{$section}) { $section_seen{$section} = 1; @@ -205,12 +195,22 @@ sub git_config_dump { $rv{$k} = $v; } } - close $fh or die "failed to close ($cmd) pipe: $?"; $rv{-section_order} = \@section_order; \%rv; } +sub git_config_dump { + my ($file) = @_; + return {} unless -e $file; + my @cmd = (qw/git config -z -l/, "--file=$file"); + my $cmd = join(' ', @cmd); + my $fh = popen_rd(\@cmd) or die "popen_rd failed for $file: $!\n"; + my $rv = config_fh_parse($fh, "\0", "\n"); + close $fh or die "failed to close ($cmd) pipe: $?"; + $rv; +} + sub valid_inbox_name ($) { my ($name) = @_; diff --git a/t/config.t b/t/config.t index a3c74fa2..3b4b12b3 100644 --- a/t/config.t +++ b/t/config.t @@ -58,30 +58,33 @@ my $tmpdir = tempdir('pi-config-XXXXXX', TMPDIR => 1, CLEANUP => 1); { my $cfgpfx = "publicinbox.test"; my @altid = qw(serial:gmane:file=a serial:enamg:file=b); - my $config = PublicInbox::Config->new({ - "$cfgpfx.address" => 'test@example.com', - "$cfgpfx.mainrepo" => '/path/to/non/existent', - "$cfgpfx.altid" => [ @altid ], - }); + my $config = PublicInbox::Config->new(\<lookup_name('test'); is_deeply($ibx->{altid}, [ @altid ]); } { my $pfx = "publicinbox.test"; - my %h = ( - "$pfx.address" => 'test@example.com', - "$pfx.mainrepo" => '/path/to/non/existent', - "publicinbox.nntpserver" => 'news.example.com', - ); - my %tmp = %h; - my $cfg = PublicInbox::Config->new(\%tmp); + my $str = <new(\$str); my $ibx = $cfg->lookup_name('test'); is($ibx->{nntpserver}, 'news.example.com', 'global NNTP server'); - delete $h{'publicinbox.nntpserver'}; - $h{"$pfx.nntpserver"} = 'news.alt.example.com'; - $cfg = PublicInbox::Config->new(\%h); + $str = <new(\$str); $ibx = $cfg->lookup_name('test'); is($ibx->{nntpserver}, 'news.alt.example.com','per-inbox NNTP server'); } @@ -90,17 +93,15 @@ my $tmpdir = tempdir('pi-config-XXXXXX', TMPDIR => 1, CLEANUP => 1); { my $pfx = "publicinbox.test"; my $pfx2 = "publicinbox.foo"; - my %h = ( - "$pfx.address" => 'test@example.com', - "$pfx.mainrepo" => '/path/to/non/existent', - "$pfx2.address" => 'foo@example.com', - "$pfx2.mainrepo" => '/path/to/foo', - lc("publicinbox.noObfuscate") => - 'public-inbox.org @example.com z@EXAMPLE.com', - "$pfx.obfuscate" => 'true', # :< - ); - my %tmp = %h; - my $cfg = PublicInbox::Config->new(\%tmp); + my $str = <new(\$str); my $ibx = $cfg->lookup_name('test'); my $re = $ibx->{-no_obfuscate_re}; like('meta@public-inbox.org', $re, @@ -174,16 +175,16 @@ for my $s (@valid) { { my $pfx1 = "publicinbox.test1"; my $pfx2 = "publicinbox.test2"; - my $h = { - "$pfx1.address" => 'test@example.com', - "$pfx1.mainrepo" => '/path/to/non/existent', - "$pfx2.address" => 'foo@example.com', - "$pfx2.mainrepo" => '/path/to/foo', - "$pfx1.coderepo" => 'project', - "$pfx2.coderepo" => 'project', - "coderepo.project.dir" => '/path/to/project.git', - }; - my $cfg = PublicInbox::Config->new($h); + my $str = <new(\$str); my $t1 = $cfg->lookup_name('test1'); my $t2 = $cfg->lookup_name('test2'); is($t1->{-repo_objs}->[0], $t2->{-repo_objs}->[0], diff --git a/t/config_limiter.t b/t/config_limiter.t index 9fafceae..c1fffecf 100644 --- a/t/config_limiter.t +++ b/t/config_limiter.t @@ -6,11 +6,11 @@ use Test::More; use PublicInbox::Config; my $cfgpfx = "publicinbox.test"; { - my $config = PublicInbox::Config->new({ - "$cfgpfx.address" => 'test@example.com', - "$cfgpfx.mainrepo" => '/path/to/non/existent', - "$cfgpfx.httpbackendmax" => 12, - }); + my $config = PublicInbox::Config->new(\<lookup_name('test'); my $git = $ibx->git; my $old = "$git"; @@ -24,16 +24,16 @@ my $cfgpfx = "publicinbox.test"; } { - my $config = PublicInbox::Config->new({ - 'publicinboxlimiter.named.max' => 3, - "$cfgpfx.address" => 'test@example.com', - "$cfgpfx.mainrepo" => '/path/to/non/existent', - "$cfgpfx.httpbackendmax" => 'named', - }); + my $config = PublicInbox::Config->new(\<lookup_name('test'); my $git = $ibx->git; ok($git, 'got git object'); - my $old = "$git"; + my $old = "$git"; # stringify object ref "Git(0xDEADBEEF)" my $lim = $git->{-httpbackend_limiter}; ok($lim, 'Limiter exists'); is($lim->{max}, 3, 'limiter has expected slots'); diff --git a/t/psgi_attach.t b/t/psgi_attach.t index 41695e0d..f5140f44 100644 --- a/t/psgi_attach.t +++ b/t/psgi_attach.t @@ -21,10 +21,10 @@ use PublicInbox::Config; use PublicInbox::WWW; use_ok 'PublicInbox::WwwAttach'; use Plack::Builder; -my $config = PublicInbox::Config->new({ - "$cfgpfx.address" => $addr, - "$cfgpfx.mainrepo" => $maindir, -}); +my $config = PublicInbox::Config->new(\<new($maindir); my $im = PublicInbox::Import->new($git, 'test', $addr); diff --git a/t/psgi_bad_mids.t b/t/psgi_bad_mids.t index c561cc36..95196a3f 100644 --- a/t/psgi_bad_mids.t +++ b/t/psgi_bad_mids.t @@ -53,11 +53,11 @@ Date: Fri, 02 Oct 1993 00:00:0$i +0000 } $im->done; -my $cfg = { - "$cfgpfx.address" => $ibx->{-primary_address}, - "$cfgpfx.mainrepo" => $mainrepo, -}; -my $config = PublicInbox::Config->new($cfg); +my $cfg = <{-primary_address} +$cfgpfx.mainrepo=$mainrepo +EOF +my $config = PublicInbox::Config->new(\$cfg); my $www = PublicInbox::WWW->new($config); test_psgi(sub { $www->call(@_) }, sub { my ($cb) = @_; diff --git a/t/psgi_mount.t b/t/psgi_mount.t index 8da2bc89..7160896b 100644 --- a/t/psgi_mount.t +++ b/t/psgi_mount.t @@ -21,10 +21,10 @@ use PublicInbox::Config; use PublicInbox::WWW; use Plack::Builder; use Plack::App::URLMap; -my $config = PublicInbox::Config->new({ - "$cfgpfx.address" => $addr, - "$cfgpfx.mainrepo" => $maindir, -}); +my $config = PublicInbox::Config->new(\<new($maindir); my $im = PublicInbox::Import->new($git, 'test', $addr); diff --git a/t/psgi_multipart_not.t b/t/psgi_multipart_not.t index ae248de3..2670c47a 100644 --- a/t/psgi_multipart_not.t +++ b/t/psgi_multipart_not.t @@ -42,11 +42,11 @@ ok($im->add($mime), 'added broken multipart message'); $im->done; my $cfgpfx = "publicinbox.v2test"; -my $cfg = { - "$cfgpfx.address" => $ibx->{-primary_address}, - "$cfgpfx.mainrepo" => $repo, -}; -my $config = PublicInbox::Config->new($cfg); +my $cfg = <{-primary_address} +$cfgpfx.mainrepo=$repo +EOF +my $config = PublicInbox::Config->new(\$cfg); my $www = PublicInbox::WWW->new($config); my ($res, $raw); diff --git a/t/psgi_scan_all.t b/t/psgi_scan_all.t index 2f54c820..2e00b6d8 100644 --- a/t/psgi_scan_all.t +++ b/t/psgi_scan_all.t @@ -14,13 +14,15 @@ foreach my $mod (@mods) { use_ok 'PublicInbox::V2Writable'; foreach my $mod (@mods) { use_ok $mod; } my $tmp = tempdir('pi-scan_all-XXXXXX', TMPDIR => 1, CLEANUP => 1); -my $cfg = {}; +my $cfg = ''; foreach my $i (1..2) { my $cfgpfx = "publicinbox.test-$i"; - my $addr = $cfg->{"$cfgpfx.address"} = "test-$i\@example.com"; - my $mainrepo = $cfg->{"$cfgpfx.mainrepo"} = "$tmp/$i"; - $cfg->{"$cfgpfx.url"} = "http://example.com/$i"; + my $addr = "test-$i\@example.com"; + my $mainrepo = "$tmp/$i"; + $cfg .= "$cfgpfx.address=$addr\n"; + $cfg .= "$cfgpfx.mainrepo=$mainrepo\n"; + $cfg .= "$cfgpfx.url=http://example.com/$i\n"; my $opt = { mainrepo => $mainrepo, name => "test-$i", @@ -45,7 +47,7 @@ EOF ok($im->add($mime), "added message to $i"); $im->done; } -my $config = PublicInbox::Config->new($cfg); +my $config = PublicInbox::Config->new(\$cfg); use_ok 'PublicInbox::WWW'; my $www = PublicInbox::WWW->new($config); diff --git a/t/psgi_search.t b/t/psgi_search.t index 0c4bdcd1..ab6892bc 100644 --- a/t/psgi_search.t +++ b/t/psgi_search.t @@ -45,10 +45,10 @@ $im->done; PublicInbox::SearchIdx->new($ibx, 1)->index_sync; my $cfgpfx = "publicinbox.test"; -my $config = PublicInbox::Config->new({ - "$cfgpfx.address" => 'git@vger.kernel.org', - "$cfgpfx.mainrepo" => $tmpdir, -}); +my $config = PublicInbox::Config->new(\<new($config); test_psgi(sub { $www->call(@_) }, sub { my ($cb) = @_; diff --git a/t/psgi_text.t b/t/psgi_text.t index bdc1ebfd..944a6476 100644 --- a/t/psgi_text.t +++ b/t/psgi_text.t @@ -21,10 +21,10 @@ use PublicInbox::Config; use PublicInbox::WWW; use_ok 'PublicInbox::WwwText'; use Plack::Builder; -my $config = PublicInbox::Config->new({ - "$cfgpfx.address" => $addr, - "$cfgpfx.mainrepo" => $maindir, -}); +my $config = PublicInbox::Config->new(\<new($config); diff --git a/t/psgi_v2.t b/t/psgi_v2.t index 3003c5d6..e4f7306e 100644 --- a/t/psgi_v2.t +++ b/t/psgi_v2.t @@ -54,11 +54,11 @@ $new_mid = $mids->[1]; $im->done; my $cfgpfx = "publicinbox.v2test"; -my $cfg = { - "$cfgpfx.address" => $ibx->{-primary_address}, - "$cfgpfx.mainrepo" => $mainrepo, -}; -my $config = PublicInbox::Config->new($cfg); +my $cfg = <{-primary_address} +$cfgpfx.mainrepo=$mainrepo +EOF +my $config = PublicInbox::Config->new(\$cfg); my $www = PublicInbox::WWW->new($config); my ($res, $raw, @from_); test_psgi(sub { $www->call(@_) }, sub { diff --git a/t/watch_filter_rubylang.t b/t/watch_filter_rubylang.t index da383c15..b28d699a 100644 --- a/t/watch_filter_rubylang.t +++ b/t/watch_filter_rubylang.t @@ -70,15 +70,15 @@ spam EOF PublicInbox::Emergency->new($maildir)->prepare(\"$spam"); - my %orig = ( - "$cfgpfx.address" => $addr, - "$cfgpfx.mainrepo" => $mainrepo, - "$cfgpfx.watch" => "maildir:$maildir", - "$cfgpfx.filter" => 'PublicInbox::Filter::RubyLang', - "$cfgpfx.altid" => 'serial:alerts:file=msgmap.sqlite3', - "publicinboxwatch.watchspam" => "maildir:$spamdir", - ); - my $config = PublicInbox::Config->new({%orig}); + my $orig = <new(\$orig); my $ibx = $config->lookup_name($v); ok($ibx, 'found inbox by name'); @@ -108,7 +108,7 @@ EOF } $w->scan('full'); - $config = PublicInbox::Config->new({%orig}); + $config = PublicInbox::Config->new(\$orig); $ibx = $config->lookup_name($v); ($tot, undef) = $ibx->search->reopen->query('b:spam'); is($tot, 0, 'spam removed'); diff --git a/t/watch_maildir.t b/t/watch_maildir.t index d164bf35..e65ab9a9 100644 --- a/t/watch_maildir.t +++ b/t/watch_maildir.t @@ -35,13 +35,13 @@ ok(POSIX::mkfifo("$maildir/cur/fifo", 0777), 'create FIFO to ensure we do not get stuck on it :P'); my $sem = PublicInbox::Emergency->new($spamdir); # create dirs -my $config = PublicInbox::Config->new({ - "$cfgpfx.address" => $addr, - "$cfgpfx.mainrepo" => $git_dir, - "$cfgpfx.watch" => "maildir:$maildir", - "$cfgpfx.filter" => 'PublicInbox::Filter::Vger', - "publicinboxlearn.watchspam" => "maildir:$spamdir", -}); +my $config = PublicInbox::Config->new(\<new($config)->scan('full'); my $git = PublicInbox::Git->new($git_dir); diff --git a/t/watch_maildir_v2.t b/t/watch_maildir_v2.t index f1d6e740..0a5a8017 100644 --- a/t/watch_maildir_v2.t +++ b/t/watch_maildir_v2.t @@ -40,14 +40,14 @@ ok(POSIX::mkfifo("$maildir/cur/fifo", 0777), 'create FIFO to ensure we do not get stuck on it :P'); my $sem = PublicInbox::Emergency->new($spamdir); # create dirs -my %orig = ( - "$cfgpfx.address" => $addr, - "$cfgpfx.mainrepo" => $mainrepo, - "$cfgpfx.watch" => "maildir:$maildir", - "$cfgpfx.filter" => 'PublicInbox::Filter::Vger', - "publicinboxlearn.watchspam" => "maildir:$spamdir" -); -my $config = PublicInbox::Config->new({%orig}); +my $orig = <new(\$orig); my $ibx = $config->lookup_name('test'); ok($ibx, 'found inbox by name'); my $srch = $ibx->search; @@ -146,12 +146,12 @@ More majordomo info at http://vger.kernel.org/majordomo-info.html\n); my $v1pfx = "publicinbox.v1"; my $v1addr = 'v1-public@example.com'; is(system(qw(git init -q --bare), $v1repo), 0, 'v1 init OK'); - my $config = PublicInbox::Config->new({ - %orig, - "$v1pfx.address" => $v1addr, - "$v1pfx.mainrepo" => $v1repo, - "$v1pfx.watch" => "maildir:$maildir", - }); + my $cfg2 = <new(\$cfg2); my $both = < Date: Tue, 15 Oct 2019 01:11:58 +0000 Subject: config: simplify lookup* methods This ensures we always process inboxes in section order and reduces the amount of code we have to maintain for each lookup. Avoiding the cost of inboxes object creation is not worth the code overhead; and we can implement a config cache via Storable easily for large configs and -mda users. --- lib/PublicInbox/Config.pm | 75 +++++++++-------------------------------------- 1 file changed, 14 insertions(+), 61 deletions(-) diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index b7e03af3..2b99346a 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -63,58 +63,24 @@ sub new { $self; } +sub _fill_all ($) { each_inbox($_[0], sub {}) } + +sub _lookup_fill ($$$) { + my ($self, $cache, $key) = @_; + $self->{$cache}->{$key} // do { + _fill_all($self); + $self->{$cache}->{$key}; + } +} + sub lookup { my ($self, $recipient) = @_; - my $addr = lc($recipient); - my $ibx = $self->{-by_addr}->{$addr}; - return $ibx if $ibx; - - my $pfx; - - foreach my $k (keys %$self) { - $k =~ m!\A(publicinbox\.[^/]+)\.address\z! or next; - my $v = $self->{$k}; - if (ref($v) eq "ARRAY") { - foreach my $alias (@$v) { - (lc($alias) eq $addr) or next; - $pfx = $1; - last; - } - } else { - (lc($v) eq $addr) or next; - $pfx = $1; - last; - } - } - defined $pfx or return; - _fill($self, $pfx); + _lookup_fill($self, '-by_addr', lc($recipient)); } sub lookup_list_id { my ($self, $list_id) = @_; - $list_id = lc($list_id); - my $ibx = $self->{-by_list_id}->{$list_id}; - return $ibx if $ibx; - - my $pfx; - - foreach my $k (keys %$self) { - $k =~ /\A(publicinbox\.[\w-]+)\.listid\z/ or next; - my $v = $self->{$k}; - if (ref($v) eq "ARRAY") { - foreach my $alias (@$v) { - (lc($alias) eq $list_id) or next; - $pfx = $1; - last; - } - } else { - (lc($v) eq $list_id) or next; - $pfx = $1; - last; - } - } - defined $pfx or return; - _fill($self, $pfx); + _lookup_fill($self, '-by_list_id', lc($list_id)); } sub lookup_name ($$) { @@ -135,20 +101,7 @@ sub each_inbox { sub lookup_newsgroup { my ($self, $ng) = @_; - $ng = lc($ng); - my $ibx = $self->{-by_newsgroup}->{$ng}; - return $ibx if $ibx; - - foreach my $k (keys %$self) { - $k =~ m!\A(publicinbox\.[^/]+)\.newsgroup\z! or next; - my $v = $self->{$k}; - my $pfx = $1; - if ($v eq $ng) { - $ibx = _fill($self, $pfx); - return $ibx; - } - } - undef; + _lookup_fill($self, '-by_newsgroup', lc($ng)); } sub limiter { @@ -461,7 +414,7 @@ sub _fill { if ($ibx->{obfuscate}) { $ibx->{-no_obfuscate} = $self->{-no_obfuscate}; $ibx->{-no_obfuscate_re} = $self->{-no_obfuscate_re}; - each_inbox($self, sub {}); # noop to populate -no_obfuscate + _fill_all($self); # noop to populate -no_obfuscate } if (my $ibx_code_repos = $ibx->{coderepo}) { -- cgit v1.2.3-24-ge0c7 From fe3d294c237640bebf5d047c92921287a20d4485 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 15 Oct 2019 01:18:54 +0000 Subject: config: avoid unnecessary '||' use '//' is available in Perl 5.10+ which allows `0' and `""' (empty string) to remain unclobbered. We also don't need '||=' for initializing our internal caches. --- lib/PublicInbox/Config.pm | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index 2b99346a..e0329ebf 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -29,13 +29,13 @@ sub new { } bless $self, $class; # caches - $self->{-by_addr} ||= {}; - $self->{-by_list_id} ||= {}; - $self->{-by_name} ||= {}; - $self->{-by_newsgroup} ||= {}; - $self->{-no_obfuscate} ||= {}; - $self->{-limiters} ||= {}; - $self->{-code_repos} ||= {}; # nick => PublicInbox::Git object + $self->{-by_addr} = {}; + $self->{-by_list_id} = {}; + $self->{-by_name} = {}; + $self->{-by_newsgroup} = {}; + $self->{-no_obfuscate} = {}; + $self->{-limiters} = {}; + $self->{-code_repos} = {}; # nick => PublicInbox::Git object $self->{-cgitrc_unparsed} = $self->{'publicinbox.cgitrc'}; if (my $no = delete $self->{'publicinbox.noobfuscate'}) { @@ -85,7 +85,7 @@ sub lookup_list_id { sub lookup_name ($$) { my ($self, $name) = @_; - $self->{-by_name}->{$name} || _fill($self, "publicinbox.$name"); + $self->{-by_name}->{$name} // _fill($self, "publicinbox.$name"); } sub each_inbox { @@ -106,7 +106,7 @@ sub lookup_newsgroup { sub limiter { my ($self, $name) = @_; - $self->{-limiters}->{$name} ||= do { + $self->{-limiters}->{$name} //= do { require PublicInbox::Qspawn; my $max = $self->{"publicinboxlimiter.$name.max"} || 1; my $limiter = PublicInbox::Qspawn::Limiter->new($max); @@ -115,7 +115,7 @@ sub limiter { }; } -sub config_dir { $ENV{PI_DIR} || "$ENV{HOME}/.public-inbox" } +sub config_dir { $ENV{PI_DIR} // "$ENV{HOME}/.public-inbox" } sub default_file { my $f = $ENV{PI_CONFIG}; @@ -206,8 +206,8 @@ sub cgit_repo_merge ($$$) { $self->{-cgit_remove_suffix} and $rel =~ s!/?\.git\z!!; } - $self->{"coderepo.$rel.dir"} ||= $path; - $self->{"coderepo.$rel.cgiturl"} ||= $rel; + $self->{"coderepo.$rel.dir"} //= $path; + $self->{"coderepo.$rel.cgiturl"} //= $rel; } sub is_git_dir ($) { @@ -338,7 +338,7 @@ sub _fill_code_repo { # cgit supports "/blob/?id=%s", but it's only a plain-text # display and requires an unabbreviated id= foreach my $t (qw(blob commit tag)) { - $git->{$t.'_url_format'} ||= map { + $git->{$t.'_url_format'} //= map { "$_/$t/?id=%s" } @$cgits; } @@ -426,7 +426,7 @@ sub _fill { $valid += valid_inbox_name($_) foreach (@parts); $valid == scalar(@parts) or next; - my $repo = $code_repos->{$nick} ||= + my $repo = $code_repos->{$nick} //= _fill_code_repo($self, $nick); push @$repo_objs, $repo if $repo; } -- cgit v1.2.3-24-ge0c7 From 849f57851a04e376cf2327d9e98e8128bf3c756e Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 15 Oct 2019 01:38:25 +0000 Subject: config: allow "0" as a valid mainrepo path It's probably wrong to use relative path names, but things are all relative these days anyways with shared and networked FSes. --- lib/PublicInbox/Config.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index e0329ebf..509de0a0 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -93,7 +93,7 @@ sub each_inbox { # may auto-vivify if config file is non-existent: foreach my $section (@{$self->{-section_order}}) { next if $section !~ m!\Apublicinbox\.([^/]+)\z!; - $self->{"publicinbox.$1.mainrepo"} or next; + defined($self->{"publicinbox.$1.mainrepo"}) or next; my $ibx = lookup_name($self, $1) or next; $cb->($ibx); } -- cgit v1.2.3-24-ge0c7 From 7e881ac8227d1882c92de6f6701ffcba7cef9191 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 15 Oct 2019 03:40:19 +0000 Subject: mda, watch: wire up List-ID header support This also adds watchheader tests for -watch, which we never had before :x --- Documentation/public-inbox-config.pod | 18 ++++++++++++++++++ Documentation/public-inbox-mda.pod | 3 +++ lib/PublicInbox/WatchMaildir.pm | 31 ++++++++++++++++++++++++------- script/public-inbox-mda | 17 ++++++++++++++--- t/mda.t | 28 ++++++++++++++++++++++++++++ t/watch_maildir_v2.t | 31 +++++++++++++++++++++++++++++++ 6 files changed, 118 insertions(+), 10 deletions(-) diff --git a/Documentation/public-inbox-config.pod b/Documentation/public-inbox-config.pod index 8d545f7a..6a9739f7 100644 --- a/Documentation/public-inbox-config.pod +++ b/Documentation/public-inbox-config.pod @@ -85,6 +85,24 @@ the given header. Multiple values are not currently supported. Default: none; only for L users +=item publicinbox..listid + +The L header without +angle brackets for L deliveries and +L. + +For public-inbox-watch users, this is a shortcut for specifying +C> + +For public-inbox-mda users, this may be used to avoid recipient +matching via C environment variable. + +This may be specified multiple times for merging multiple mailing +lists into a single public-inbox, only one C header +needs to match. + +Default: none + =item publicinbox..nntpmirror This may be the full NNTP URL of an independently-run mirror. diff --git a/Documentation/public-inbox-mda.pod b/Documentation/public-inbox-mda.pod index 64ec690c..921b7a15 100644 --- a/Documentation/public-inbox-mda.pod +++ b/Documentation/public-inbox-mda.pod @@ -25,6 +25,9 @@ L The original recipient email address, set by the MTA. Postfix sets it by default, untested on other MTAs. +This does not have to be set if relying on C +directives configured in L. + =item PI_CONFIG Per-user config file parseable by L. diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm index f63140c8..08b1aab4 100644 --- a/lib/PublicInbox/WatchMaildir.pm +++ b/lib/PublicInbox/WatchMaildir.pm @@ -59,9 +59,19 @@ sub new { my $watch = $ibx->{watch} or return; if (is_maildir($watch)) { - if (my $wm = $ibx->{watchheader}) { - my ($k, $v) = split(/:/, $wm, 2); - $ibx->{-watchheader} = [ $k, qr/\Q$v\E/ ]; + my $watch_hdrs = []; + if (my $wh = $ibx->{watchheader}) { + my ($k, $v) = split(/:/, $wh, 2); + push @$watch_hdrs, [ $k, qr/\Q$v\E/ ]; + } + if (my $list_ids = $ibx->{listid}) { + for (@$list_ids) { + my $re = qr/<[ \t]*\Q$_\E[ \t]*>/; + push @$watch_hdrs, ['List-Id', $re ]; + } + } + if (scalar @$watch_hdrs) { + $ibx->{-watchheaders} = $watch_hdrs; } my $new = "$watch/new"; my $cur = "$watch/cur"; @@ -159,10 +169,17 @@ sub _try_path { my $mime = _path_to_mime($path) or next; my $im = _importer_for($self, $ibx); - my $wm = $ibx->{-watchheader}; - if ($wm) { - my $v = $mime->header_obj->header_raw($wm->[0]); - next unless ($v && $v =~ $wm->[1]); + # any header match means it's eligible for the inbox: + if (my $watch_hdrs = $ibx->{-watchheaders}) { + my $ok; + my $hdr = $mime->header_obj; + for my $wh (@$watch_hdrs) { + my $v = $hdr->header_raw($wh->[0]); + next unless defined($v) && $v =~ $wh->[1]; + $ok = 1; + last; + } + next unless $ok; } if (my $scrub = $ibx->filter($im)) { diff --git a/script/public-inbox-mda b/script/public-inbox-mda index 4e6e04e2..2655a6c5 100755 --- a/script/public-inbox-mda +++ b/script/public-inbox-mda @@ -36,10 +36,21 @@ my $config = PublicInbox::Config->new; my $key = 'publicinboxmda.spamcheck'; my $default = 'PublicInbox::Spamcheck::Spamc'; my $spamc = PublicInbox::Spamcheck::get($config, $key, $default); +my $dst; my $recipient = $ENV{ORIGINAL_RECIPIENT}; -defined $recipient or die "ORIGINAL_RECIPIENT not defined in ENV\n"; -my $dst = $config->lookup($recipient); # first check -defined $dst or do_exit(67); # EX_NOUSER 5.1.1 user unknown +if (defined $recipient) { + $dst = $config->lookup($recipient); # first check +} +if (!defined $dst) { + my $list_id = $simple->header('List-Id'); + if (defined $list_id && $list_id =~ /<[ \t]*(.+)?[ \t]*>/) { + $dst = $config->lookup_list_id($1); + } + if (!defined $dst && !defined $recipient) { + die "ORIGINAL_RECIPIENT not defined in ENV\n"; + } + defined $dst or do_exit(67); # EX_NOUSER 5.1.1 user unknown +} $dst->{mainrepo} or do_exit(67); $dst = PublicInbox::InboxWritable->new($dst); diff --git a/t/mda.t b/t/mda.t index 5621b7d6..3cab590b 100644 --- a/t/mda.t +++ b/t/mda.t @@ -267,6 +267,34 @@ EOF } } +# List-ID based delivery +{ + local $ENV{PI_EMERGENCY} = $faildir; + local $ENV{HOME} = $home; + local $ENV{ORIGINAL_RECIPIENT} = undef; + local $ENV{PATH} = $main_path; + my $list_id = 'foo.example.com'; + my $mid = 'list-id-delivery@example.com'; + my $simple = Email::Simple->new(< +To: You +Cc: $addr +Message-ID: <$mid> +List-Id: <$list_id> +Subject: this message will be trained as spam +Date: Thu, 01 Jan 1970 00:00:00 +0000 + +EOF + system(qw(git config --file), $pi_config, "$cfgpfx.listid", $list_id); + $? == 0 or die "failed to set listid $?"; + my $in = $simple->as_string; + IPC::Run::run([$mda], \$in); + is($?, 0, 'mda OK with List-Id match'); + my $path = mid2path($mid); + my $msg = `git --git-dir=$maindir cat-file blob HEAD:$path`; + like($msg, qr/\Q$list_id\E/, 'delivered message w/ List-ID matches'); +} + done_testing(); sub fail_bad_header { diff --git a/t/watch_maildir_v2.t b/t/watch_maildir_v2.t index 0a5a8017..99551ceb 100644 --- a/t/watch_maildir_v2.t +++ b/t/watch_maildir_v2.t @@ -171,4 +171,35 @@ EOF is($both, $$msg, 'got original message back from v2'); } +{ + my $want = <<'EOF'; +From: +List-Id: +Message-ID: +EOF + my $do_not_want = <<'EOF'; +From: +List-Id: +X-Mailing-List: no@example.com +Message-ID: +EOF + my $cfg = $orig."$cfgpfx.listid=i.want.you.to.want.me\n"; + PublicInbox::Emergency->new($maildir)->prepare(\$want); + PublicInbox::Emergency->new($maildir)->prepare(\$do_not_want); + my $config = PublicInbox::Config->new(\$cfg); + PublicInbox::WatchMaildir->new($config)->scan('full'); + $ibx = $config->lookup_name('test'); + my $num = $ibx->mm->num_for('do.want@example.com'); + ok(defined $num, 'List-ID matched for watch'); + $num = $ibx->mm->num_for('do.not.want@example.com'); + is($num, undef, 'unaccepted List-ID matched for watch'); + + $cfg = $orig."$cfgpfx.watchheader=X-Mailing-List:no\@example.com\n"; + $config = PublicInbox::Config->new(\$cfg); + PublicInbox::WatchMaildir->new($config)->scan('full'); + $ibx = $config->lookup_name('test'); + $num = $ibx->mm->num_for('do.not.want@example.com'); + ok(defined $num, 'X-Mailing-List matched'); +} + done_testing; -- cgit v1.2.3-24-ge0c7 From a7603ca1d219f39101f8e35bdea1f53ce5c31796 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 15 Oct 2019 06:39:07 +0000 Subject: wwwtext: show listid config directive(s) We want to share this piece for potential mirror-ers just like watchheader. --- lib/PublicInbox/WwwText.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/PublicInbox/WwwText.pm b/lib/PublicInbox/WwwText.pm index 491f7e63..bcda665c 100644 --- a/lib/PublicInbox/WwwText.pm +++ b/lib/PublicInbox/WwwText.pm @@ -147,7 +147,7 @@ sub inbox_config ($$$) { [publicinbox "$name"] mainrepo = /path/to/top-level-inbox EOS - for my $k (qw(address)) { + for my $k (qw(address listid)) { defined(my $v = $ibx->{$k}) or next; $$txt .= "\t$k = $_\n" for @$v; } -- cgit v1.2.3-24-ge0c7