From d002f24a9648d1499a16ed4dec84f05c0f849740 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 11 Nov 2023 09:04:57 +0000 Subject: mda|learn|watch: support dropUniqueUnsubscribe config List-Unsubscribe headers with unique identifiers (such as those generated by our examples/unsubscribe.milter) should not end up in public archives. Add a new config knob to strip List-Unsubscribe headers if they have the `List-Unsubscribe-Post: List-Unsubscribe=One-Click' header. Unfortunately, this breaks DKIM signatures if the signature covers either of these List-Unsubscribe* headers. However, breaking DKIM is the lesser evil compared to any archive reader being able to stop archival by an independent archivist. As much as I would like this to be the default, it probably affects few users at the moment since very few mailing lists use unique identifiers in List-Unsubscribe (but that number has grown, recently). --- lib/PublicInbox/Import.pm | 27 +++++++++++++++++++++++++++ lib/PublicInbox/LeiToMail.pm | 6 ++++++ lib/PublicInbox/Watch.pm | 1 + 3 files changed, 34 insertions(+) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index 2d60db55..e4f8615e 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -321,11 +321,38 @@ sub extract_cmt_info ($;$) { # kill potentially confusing/misleading headers our @UNWANTED_HEADERS = (qw(Bytes Lines Content-Length), qw(Status X-Status)); +our $DROP_UNIQUE_UNSUB; sub drop_unwanted_headers ($) { my ($eml) = @_; for (@UNWANTED_HEADERS, @PublicInbox::MDA::BAD_HEADERS) { $eml->header_set($_); } + + # We don't want public-inbox readers to be able to unsubcribe the + # address which does archiving. WARNING: this breaks DKIM if the + # mailing list sender follows RFC 8058, section 4; but breaking DKIM + # (or have senders ignore RFC 8058 sec. 4) is preferable to having + # saboteurs unsubscribing independent archivists: + if ($DROP_UNIQUE_UNSUB && grep(/\AList-Unsubscribe=One-Click\z/, + $eml->header_raw('List-Unsubscribe-Post'))) { + for (qw(List-Unsubscribe-Post List-Unsubscribe)) { + $eml->header_set($_) + } + } +} + +sub load_config ($;$) { + my ($cfg, $do_exit) = @_; + my $v = $cfg->{lc 'publicinboxImport.dropUniqueUnsubscribe'}; + if (defined $v) { + $DROP_UNIQUE_UNSUB = $cfg->git_bool($v) // do { + warn <{-f} is not boolean +EOM + $do_exit //= \&CORE::exit; + $do_exit->(78); # EX_CONFIG + }; + } } # used by V2Writable, too diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm index b73af68a..0d2f586a 100644 --- a/lib/PublicInbox/LeiToMail.pm +++ b/lib/PublicInbox/LeiToMail.pm @@ -10,6 +10,7 @@ use PublicInbox::Eml; use PublicInbox::IO; use PublicInbox::Git; use PublicInbox::Spawn qw(spawn); +use PublicInbox::Import; use IO::Handle; # ->autoflush use Fcntl qw(SEEK_SET SEEK_END O_CREAT O_EXCL O_WRONLY); use PublicInbox::Syscall qw(rename_noreplace); @@ -672,6 +673,11 @@ sub _pre_augment_v2 { }); } PublicInbox::InboxWritable->new($ibx, @creat); + local $PublicInbox::Import::DROP_UNIQUE_UNSUB; # only for workers + PublicInbox::Import::load_config(PublicInbox::Config->new, sub { + $lei->x_it(shift); + die "E: can't write v2 inbox with broken config\n"; + }); $ibx->init_inbox if @creat; my $v2w = $ibx->importer; $v2w->wq_workers_start("lei/v2w $dir", 1, $lei->oldset, {lei => $lei}, diff --git a/lib/PublicInbox/Watch.pm b/lib/PublicInbox/Watch.pm index 1cdf12a5..5253ec94 100644 --- a/lib/PublicInbox/Watch.pm +++ b/lib/PublicInbox/Watch.pm @@ -45,6 +45,7 @@ sub new { my (%mdmap); my (%imap, %nntp); # url => [inbox objects] or 'watchspam' my (@imap, @nntp); + PublicInbox::Import::load_config($cfg); # "publicinboxwatch" is the documented namespace # "publicinboxlearn" is legacy but may be supported -- cgit v1.2.3-24-ge0c7