From 53a8e32b97985803e9de12c4312a86a8850208b3 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 16 May 2019 19:22:46 -0500 Subject: Config.pm: Add support for mailing list information The world has turned since I first started following mailing lists and to my surprise every mailing list that I am subscribed to properly sets the "List-ID:" mailing list header. So instead of doing something clever and flexible I am adding support for looking up public inbox mailing lists by their mailing list name. That makes the work needed for each email trivial and easy to understand. - Parse the "List-ID:" header. - Lookup in the configuration which mailbox is connected to that "List-ID:" - Deliver the mail to that mailbox. To that end this change enhances PublicInbox to have an additional mailbox configuration parameter "listid" that holds the mailing list name. A method is added to the PublicInbox config object called lookup_list_id that given a mailing list name will return the PublicInbox in the configuration that is configured to handle that mailing list. Signed-off-by: "Eric W. Biederman" [ew: avoid autovivification of $ibx->{listid} for t/config.t] --- lib/PublicInbox/Config.pm | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index 4fcb20d2..c2fa40f9 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -25,6 +25,7 @@ sub new { # caches $self->{-by_addr} ||= {}; + $self->{-by_list_id} ||= {}; $self->{-by_name} ||= {}; $self->{-by_newsgroup} ||= {}; $self->{-no_obfuscate} ||= {}; @@ -84,6 +85,33 @@ sub lookup { _fill($self, $pfx); } +sub lookup_list_id { + my ($self, $list_id) = @_; + $list_id = lc($list_id); + my $ibx = $self->{-by_list_id}->{$list_id}; + return $ibx if $ibx; + + my $pfx; + + foreach my $k (keys %$self) { + $k =~ /\A(publicinbox\.[\w-]+)\.listid\z/ or next; + my $v = $self->{$k}; + if (ref($v) eq "ARRAY") { + foreach my $alias (@$v) { + (lc($alias) eq $list_id) or next; + $pfx = $1; + last; + } + } else { + (lc($v) eq $list_id) or next; + $pfx = $1; + last; + } + } + defined $pfx or return; + _fill($self, $pfx); +} + sub lookup_name ($$) { my ($self, $name) = @_; $self->{-by_name}->{$name} || _fill($self, "publicinbox.$name"); @@ -398,7 +426,7 @@ sub _fill { } # TODO: more arrays, we should support multi-value for # more things to encourage decentralization - foreach my $k (qw(address altid nntpmirror coderepo hide)) { + foreach my $k (qw(address altid nntpmirror coderepo hide listid)) { if (defined(my $v = $self->{"$pfx.$k"})) { $ibx->{$k} = _array($v); } @@ -421,6 +449,11 @@ sub _fill { $self->{-by_addr}->{$lc_addr} = $ibx; $self->{-no_obfuscate}->{$lc_addr} = 1; } + if (my $listids = $ibx->{listid}) { + foreach my $list_id (@$listids) { + $self->{-by_list_id}->{$list_id} = $ibx; + } + } if (my $ng = $ibx->{newsgroup}) { $self->{-by_newsgroup}->{$ng} = $ibx; } -- cgit v1.2.3-24-ge0c7 From 1317fb7b4ace03f6d9dfb1a42ee5f9371a1bf913 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 15 Oct 2019 00:38:06 +0000 Subject: config: we always have {-section_order} Rewrite a bunch of tests to use ordered input (emulating "git config -l" output) so we can always walk sections in the order they were given in the config file. --- lib/PublicInbox/Config.pm | 60 +++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 30 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index c2fa40f9..b7e03af3 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -20,9 +20,14 @@ sub _array ($) { ref($_[0]) eq 'ARRAY' ? $_[0] : [ $_[0] ] } sub new { my ($class, $file) = @_; $file = default_file() unless defined($file); - $file = ref $file ? $file : git_config_dump($file); - my $self = bless $file, $class; - + my $self; + if (ref($file) eq 'SCALAR') { # used by some tests + open my $fh, '<', $file or die; # PerlIO::scalar + $self = config_fh_parse($fh, "\n", '='); + } else { + $self = git_config_dump($file); + } + bless $self, $class; # caches $self->{-by_addr} ||= {}; $self->{-by_list_id} ||= {}; @@ -119,22 +124,12 @@ sub lookup_name ($$) { sub each_inbox { my ($self, $cb) = @_; - if (my $section_order = $self->{-section_order}) { - foreach my $section (@$section_order) { - next if $section !~ m!\Apublicinbox\.([^/]+)\z!; - $self->{"publicinbox.$1.mainrepo"} or next; - my $ibx = lookup_name($self, $1) or next; - $cb->($ibx); - } - } else { - my %seen; - foreach my $k (keys %$self) { - $k =~ m!\Apublicinbox\.([^/]+)\.mainrepo\z! or next; - next if $seen{$1}; - $seen{$1} = 1; - my $ibx = lookup_name($self, $1) or next; - $cb->($ibx); - } + # may auto-vivify if config file is non-existent: + foreach my $section (@{$self->{-section_order}}) { + next if $section !~ m!\Apublicinbox\.([^/]+)\z!; + $self->{"publicinbox.$1.mainrepo"} or next; + my $ibx = lookup_name($self, $1) or next; + $cb->($ibx); } } @@ -175,19 +170,14 @@ sub default_file { config_dir() . '/config'; } -sub git_config_dump { - my ($file) = @_; - my (%section_seen, @section_order); - return {} unless -e $file; - my @cmd = (qw/git config -z -l/, "--file=$file"); - my $cmd = join(' ', @cmd); - my $fh = popen_rd(\@cmd) or die "popen_rd failed for $file: $!\n"; +sub config_fh_parse ($$$) { + my ($fh, $rs, $fs) = @_; my %rv; - local $/ = "\0"; + my (%section_seen, @section_order); + local $/ = $rs; while (defined(my $line = <$fh>)) { chomp $line; - my ($k, $v) = split(/\n/, $line, 2); - + my ($k, $v) = split($fs, $line, 2); my ($section) = ($k =~ /\A(\S+)\.[^\.]+\z/); unless (defined $section_seen{$section}) { $section_seen{$section} = 1; @@ -205,12 +195,22 @@ sub git_config_dump { $rv{$k} = $v; } } - close $fh or die "failed to close ($cmd) pipe: $?"; $rv{-section_order} = \@section_order; \%rv; } +sub git_config_dump { + my ($file) = @_; + return {} unless -e $file; + my @cmd = (qw/git config -z -l/, "--file=$file"); + my $cmd = join(' ', @cmd); + my $fh = popen_rd(\@cmd) or die "popen_rd failed for $file: $!\n"; + my $rv = config_fh_parse($fh, "\0", "\n"); + close $fh or die "failed to close ($cmd) pipe: $?"; + $rv; +} + sub valid_inbox_name ($) { my ($name) = @_; -- cgit v1.2.3-24-ge0c7 From b41c19abcf0b0ac8a5f55678bfb0058ad50b3179 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 15 Oct 2019 01:11:58 +0000 Subject: config: simplify lookup* methods This ensures we always process inboxes in section order and reduces the amount of code we have to maintain for each lookup. Avoiding the cost of inboxes object creation is not worth the code overhead; and we can implement a config cache via Storable easily for large configs and -mda users. --- lib/PublicInbox/Config.pm | 75 +++++++++-------------------------------------- 1 file changed, 14 insertions(+), 61 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index b7e03af3..2b99346a 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -63,58 +63,24 @@ sub new { $self; } +sub _fill_all ($) { each_inbox($_[0], sub {}) } + +sub _lookup_fill ($$$) { + my ($self, $cache, $key) = @_; + $self->{$cache}->{$key} // do { + _fill_all($self); + $self->{$cache}->{$key}; + } +} + sub lookup { my ($self, $recipient) = @_; - my $addr = lc($recipient); - my $ibx = $self->{-by_addr}->{$addr}; - return $ibx if $ibx; - - my $pfx; - - foreach my $k (keys %$self) { - $k =~ m!\A(publicinbox\.[^/]+)\.address\z! or next; - my $v = $self->{$k}; - if (ref($v) eq "ARRAY") { - foreach my $alias (@$v) { - (lc($alias) eq $addr) or next; - $pfx = $1; - last; - } - } else { - (lc($v) eq $addr) or next; - $pfx = $1; - last; - } - } - defined $pfx or return; - _fill($self, $pfx); + _lookup_fill($self, '-by_addr', lc($recipient)); } sub lookup_list_id { my ($self, $list_id) = @_; - $list_id = lc($list_id); - my $ibx = $self->{-by_list_id}->{$list_id}; - return $ibx if $ibx; - - my $pfx; - - foreach my $k (keys %$self) { - $k =~ /\A(publicinbox\.[\w-]+)\.listid\z/ or next; - my $v = $self->{$k}; - if (ref($v) eq "ARRAY") { - foreach my $alias (@$v) { - (lc($alias) eq $list_id) or next; - $pfx = $1; - last; - } - } else { - (lc($v) eq $list_id) or next; - $pfx = $1; - last; - } - } - defined $pfx or return; - _fill($self, $pfx); + _lookup_fill($self, '-by_list_id', lc($list_id)); } sub lookup_name ($$) { @@ -135,20 +101,7 @@ sub each_inbox { sub lookup_newsgroup { my ($self, $ng) = @_; - $ng = lc($ng); - my $ibx = $self->{-by_newsgroup}->{$ng}; - return $ibx if $ibx; - - foreach my $k (keys %$self) { - $k =~ m!\A(publicinbox\.[^/]+)\.newsgroup\z! or next; - my $v = $self->{$k}; - my $pfx = $1; - if ($v eq $ng) { - $ibx = _fill($self, $pfx); - return $ibx; - } - } - undef; + _lookup_fill($self, '-by_newsgroup', lc($ng)); } sub limiter { @@ -461,7 +414,7 @@ sub _fill { if ($ibx->{obfuscate}) { $ibx->{-no_obfuscate} = $self->{-no_obfuscate}; $ibx->{-no_obfuscate_re} = $self->{-no_obfuscate_re}; - each_inbox($self, sub {}); # noop to populate -no_obfuscate + _fill_all($self); # noop to populate -no_obfuscate } if (my $ibx_code_repos = $ibx->{coderepo}) { -- cgit v1.2.3-24-ge0c7 From fe3d294c237640bebf5d047c92921287a20d4485 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 15 Oct 2019 01:18:54 +0000 Subject: config: avoid unnecessary '||' use '//' is available in Perl 5.10+ which allows `0' and `""' (empty string) to remain unclobbered. We also don't need '||=' for initializing our internal caches. --- lib/PublicInbox/Config.pm | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index 2b99346a..e0329ebf 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -29,13 +29,13 @@ sub new { } bless $self, $class; # caches - $self->{-by_addr} ||= {}; - $self->{-by_list_id} ||= {}; - $self->{-by_name} ||= {}; - $self->{-by_newsgroup} ||= {}; - $self->{-no_obfuscate} ||= {}; - $self->{-limiters} ||= {}; - $self->{-code_repos} ||= {}; # nick => PublicInbox::Git object + $self->{-by_addr} = {}; + $self->{-by_list_id} = {}; + $self->{-by_name} = {}; + $self->{-by_newsgroup} = {}; + $self->{-no_obfuscate} = {}; + $self->{-limiters} = {}; + $self->{-code_repos} = {}; # nick => PublicInbox::Git object $self->{-cgitrc_unparsed} = $self->{'publicinbox.cgitrc'}; if (my $no = delete $self->{'publicinbox.noobfuscate'}) { @@ -85,7 +85,7 @@ sub lookup_list_id { sub lookup_name ($$) { my ($self, $name) = @_; - $self->{-by_name}->{$name} || _fill($self, "publicinbox.$name"); + $self->{-by_name}->{$name} // _fill($self, "publicinbox.$name"); } sub each_inbox { @@ -106,7 +106,7 @@ sub lookup_newsgroup { sub limiter { my ($self, $name) = @_; - $self->{-limiters}->{$name} ||= do { + $self->{-limiters}->{$name} //= do { require PublicInbox::Qspawn; my $max = $self->{"publicinboxlimiter.$name.max"} || 1; my $limiter = PublicInbox::Qspawn::Limiter->new($max); @@ -115,7 +115,7 @@ sub limiter { }; } -sub config_dir { $ENV{PI_DIR} || "$ENV{HOME}/.public-inbox" } +sub config_dir { $ENV{PI_DIR} // "$ENV{HOME}/.public-inbox" } sub default_file { my $f = $ENV{PI_CONFIG}; @@ -206,8 +206,8 @@ sub cgit_repo_merge ($$$) { $self->{-cgit_remove_suffix} and $rel =~ s!/?\.git\z!!; } - $self->{"coderepo.$rel.dir"} ||= $path; - $self->{"coderepo.$rel.cgiturl"} ||= $rel; + $self->{"coderepo.$rel.dir"} //= $path; + $self->{"coderepo.$rel.cgiturl"} //= $rel; } sub is_git_dir ($) { @@ -338,7 +338,7 @@ sub _fill_code_repo { # cgit supports "/blob/?id=%s", but it's only a plain-text # display and requires an unabbreviated id= foreach my $t (qw(blob commit tag)) { - $git->{$t.'_url_format'} ||= map { + $git->{$t.'_url_format'} //= map { "$_/$t/?id=%s" } @$cgits; } @@ -426,7 +426,7 @@ sub _fill { $valid += valid_inbox_name($_) foreach (@parts); $valid == scalar(@parts) or next; - my $repo = $code_repos->{$nick} ||= + my $repo = $code_repos->{$nick} //= _fill_code_repo($self, $nick); push @$repo_objs, $repo if $repo; } -- cgit v1.2.3-24-ge0c7 From 849f57851a04e376cf2327d9e98e8128bf3c756e Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 15 Oct 2019 01:38:25 +0000 Subject: config: allow "0" as a valid mainrepo path It's probably wrong to use relative path names, but things are all relative these days anyways with shared and networked FSes. --- lib/PublicInbox/Config.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index e0329ebf..509de0a0 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -93,7 +93,7 @@ sub each_inbox { # may auto-vivify if config file is non-existent: foreach my $section (@{$self->{-section_order}}) { next if $section !~ m!\Apublicinbox\.([^/]+)\z!; - $self->{"publicinbox.$1.mainrepo"} or next; + defined($self->{"publicinbox.$1.mainrepo"}) or next; my $ibx = lookup_name($self, $1) or next; $cb->($ibx); } -- cgit v1.2.3-24-ge0c7 From 7e881ac8227d1882c92de6f6701ffcba7cef9191 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 15 Oct 2019 03:40:19 +0000 Subject: mda, watch: wire up List-ID header support This also adds watchheader tests for -watch, which we never had before :x --- lib/PublicInbox/WatchMaildir.pm | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm index f63140c8..08b1aab4 100644 --- a/lib/PublicInbox/WatchMaildir.pm +++ b/lib/PublicInbox/WatchMaildir.pm @@ -59,9 +59,19 @@ sub new { my $watch = $ibx->{watch} or return; if (is_maildir($watch)) { - if (my $wm = $ibx->{watchheader}) { - my ($k, $v) = split(/:/, $wm, 2); - $ibx->{-watchheader} = [ $k, qr/\Q$v\E/ ]; + my $watch_hdrs = []; + if (my $wh = $ibx->{watchheader}) { + my ($k, $v) = split(/:/, $wh, 2); + push @$watch_hdrs, [ $k, qr/\Q$v\E/ ]; + } + if (my $list_ids = $ibx->{listid}) { + for (@$list_ids) { + my $re = qr/<[ \t]*\Q$_\E[ \t]*>/; + push @$watch_hdrs, ['List-Id', $re ]; + } + } + if (scalar @$watch_hdrs) { + $ibx->{-watchheaders} = $watch_hdrs; } my $new = "$watch/new"; my $cur = "$watch/cur"; @@ -159,10 +169,17 @@ sub _try_path { my $mime = _path_to_mime($path) or next; my $im = _importer_for($self, $ibx); - my $wm = $ibx->{-watchheader}; - if ($wm) { - my $v = $mime->header_obj->header_raw($wm->[0]); - next unless ($v && $v =~ $wm->[1]); + # any header match means it's eligible for the inbox: + if (my $watch_hdrs = $ibx->{-watchheaders}) { + my $ok; + my $hdr = $mime->header_obj; + for my $wh (@$watch_hdrs) { + my $v = $hdr->header_raw($wh->[0]); + next unless defined($v) && $v =~ $wh->[1]; + $ok = 1; + last; + } + next unless $ok; } if (my $scrub = $ibx->filter($im)) { -- cgit v1.2.3-24-ge0c7 From a7603ca1d219f39101f8e35bdea1f53ce5c31796 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 15 Oct 2019 06:39:07 +0000 Subject: wwwtext: show listid config directive(s) We want to share this piece for potential mirror-ers just like watchheader. --- lib/PublicInbox/WwwText.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/WwwText.pm b/lib/PublicInbox/WwwText.pm index 491f7e63..bcda665c 100644 --- a/lib/PublicInbox/WwwText.pm +++ b/lib/PublicInbox/WwwText.pm @@ -147,7 +147,7 @@ sub inbox_config ($$$) { [publicinbox "$name"] mainrepo = /path/to/top-level-inbox EOS - for my $k (qw(address)) { + for my $k (qw(address listid)) { defined(my $v = $ibx->{$k}) or next; $$txt .= "\t$k = $_\n" for @$v; } -- cgit v1.2.3-24-ge0c7