about summary refs log tree commit homepage
path: root/lib/PublicInbox
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2023-11-11 09:04:57 +0000
committerEric Wong <e@80x24.org>2023-11-11 21:20:43 +0000
commitd002f24a9648d1499a16ed4dec84f05c0f849740 (patch)
tree2d1b9ca2913e667b6bc7bd893bf5b45f24ba22de /lib/PublicInbox
parent13d21e1701fe5ff07b1f8017ba54f92965ac1c7a (diff)
downloadpublic-inbox-d002f24a9648d1499a16ed4dec84f05c0f849740.tar.gz
List-Unsubscribe headers with unique identifiers (such as those
generated by our examples/unsubscribe.milter) should not
end up in public archives.  Add a new config knob to strip
List-Unsubscribe headers if they have the
`List-Unsubscribe-Post: List-Unsubscribe=One-Click'
header.

Unfortunately, this breaks DKIM signatures if the signature
covers either of these List-Unsubscribe* headers.  However,
breaking DKIM is the lesser evil compared to any archive reader
being able to stop archival by an independent archivist.

As much as I would like this to be the default, it probably
affects few users at the moment since very few mailing lists
use unique identifiers in List-Unsubscribe (but that number
has grown, recently).
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r--lib/PublicInbox/Import.pm27
-rw-r--r--lib/PublicInbox/LeiToMail.pm6
-rw-r--r--lib/PublicInbox/Watch.pm1
3 files changed, 34 insertions, 0 deletions
diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm
index 2d60db55..e4f8615e 100644
--- a/lib/PublicInbox/Import.pm
+++ b/lib/PublicInbox/Import.pm
@@ -321,11 +321,38 @@ sub extract_cmt_info ($;$) {
 # kill potentially confusing/misleading headers
 our @UNWANTED_HEADERS = (qw(Bytes Lines Content-Length),
                         qw(Status X-Status));
+our $DROP_UNIQUE_UNSUB;
 sub drop_unwanted_headers ($) {
         my ($eml) = @_;
         for (@UNWANTED_HEADERS, @PublicInbox::MDA::BAD_HEADERS) {
                 $eml->header_set($_);
         }
+
+        # We don't want public-inbox readers to be able to unsubcribe the
+        # address which does archiving.  WARNING: this breaks DKIM if the
+        # mailing list sender follows RFC 8058, section 4; but breaking DKIM
+        # (or have senders ignore RFC 8058 sec. 4) is preferable to having
+        # saboteurs unsubscribing independent archivists:
+        if ($DROP_UNIQUE_UNSUB && grep(/\AList-Unsubscribe=One-Click\z/,
+                                $eml->header_raw('List-Unsubscribe-Post'))) {
+                for (qw(List-Unsubscribe-Post List-Unsubscribe)) {
+                        $eml->header_set($_)
+                }
+        }
+}
+
+sub load_config ($;$) {
+        my ($cfg, $do_exit) = @_;
+        my $v = $cfg->{lc 'publicinboxImport.dropUniqueUnsubscribe'};
+        if (defined $v) {
+                $DROP_UNIQUE_UNSUB = $cfg->git_bool($v) // do {
+                        warn <<EOM;
+E: publicinboxImport.dropUniqueUnsubscribe=$v in $cfg->{-f} is not boolean
+EOM
+                        $do_exit //= \&CORE::exit;
+                        $do_exit->(78); # EX_CONFIG
+                };
+        }
 }
 
 # used by V2Writable, too
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index b73af68a..0d2f586a 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -10,6 +10,7 @@ use PublicInbox::Eml;
 use PublicInbox::IO;
 use PublicInbox::Git;
 use PublicInbox::Spawn qw(spawn);
+use PublicInbox::Import;
 use IO::Handle; # ->autoflush
 use Fcntl qw(SEEK_SET SEEK_END O_CREAT O_EXCL O_WRONLY);
 use PublicInbox::Syscall qw(rename_noreplace);
@@ -672,6 +673,11 @@ sub _pre_augment_v2 {
                 });
         }
         PublicInbox::InboxWritable->new($ibx, @creat);
+        local $PublicInbox::Import::DROP_UNIQUE_UNSUB; # only for workers
+        PublicInbox::Import::load_config(PublicInbox::Config->new, sub {
+                $lei->x_it(shift);
+                die "E: can't write v2 inbox with broken config\n";
+        });
         $ibx->init_inbox if @creat;
         my $v2w = $ibx->importer;
         $v2w->wq_workers_start("lei/v2w $dir", 1, $lei->oldset, {lei => $lei},
diff --git a/lib/PublicInbox/Watch.pm b/lib/PublicInbox/Watch.pm
index 1cdf12a5..5253ec94 100644
--- a/lib/PublicInbox/Watch.pm
+++ b/lib/PublicInbox/Watch.pm
@@ -45,6 +45,7 @@ sub new {
         my (%mdmap);
         my (%imap, %nntp); # url => [inbox objects] or 'watchspam'
         my (@imap, @nntp);
+        PublicInbox::Import::load_config($cfg);
 
         # "publicinboxwatch" is the documented namespace
         # "publicinboxlearn" is legacy but may be supported