about summary refs log tree commit homepage
path: root/scripts
diff options
context:
space:
mode:
authorEric Wong <normalperson@yhbt.net>2014-01-09 23:13:37 +0000
committerEric Wong <e@80x24.org>2014-01-09 22:37:54 +0000
commit3e96cf129ba5fc2834b691314c504aa363fd5cf4 (patch)
treedd5c42532049bc5dd2a420126edb9f07e9a5b9a0 /scripts
downloadpublic-inbox-3e96cf129ba5fc2834b691314c504aa363fd5cf4.tar.gz
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/dc-dlvr64
-rwxr-xr-xscripts/import_gmane_spool51
-rwxr-xr-xscripts/report-spam28
3 files changed, 143 insertions, 0 deletions
diff --git a/scripts/dc-dlvr b/scripts/dc-dlvr
new file mode 100755
index 00000000..96009662
--- /dev/null
+++ b/scripts/dc-dlvr
@@ -0,0 +1,64 @@
+#!/bin/sh
+# Copyright (C) 2008-2013, Eric Wong <e@80x24.org>
+# License: GPLv3 or later
+# to use with postfix main.cf: mailbox_command = /etc/dc-dlvr "$EXTENSION"
+DELIVER=/usr/lib/dovecot/deliver
+
+# my personal preference is to use a catchall account to avoid generating
+# backscatter, as invalid emails are usually spam
+case $USER in
+catchall) exec $DELIVER ;;
+esac
+
+# change if your spamc/spamd listens elsewhere
+spamc='spamc -U /run/spamd.sock'
+
+# allow plus addressing to train spam filters, $1 is the $EXTENSION
+# which may be "trainspam" or "trainham".  Only allow spam training
+# when $CLIENT_ADDRESS is empty (local client)
+case $1,$CLIENT_ADDRESS in
+trainspam,) exec $spamc -L spam > /dev/null 2>&1 ;;
+trainham,) exec $spamc -L ham > /dev/null 2>&1 ;;
+esac
+
+TMPMSG=$(mktemp -t dc-dlvr.orig.$USER.XXXXXX || exit 1)
+rm_list=$TMPMSG
+
+# pre-filter, for infrequently read lists which do their own spam filtering:
+if test -r ~/.dc-dlvr.pre
+then
+        set -e
+        cat > $TMPMSG
+        DEFAULT_INBOX=$(. ~/.dc-dlvr.pre)
+        if test xINBOX != x"$DEFAULT_INBOX"
+        then
+                $DELIVER -m $DEFAULT_INBOX < $TMPMSG
+                exec rm -f $rm_list
+        fi
+        PREMSG=$(mktemp -t dc-dlvr.orig.$USER.XXXXXX || exit 1)
+        rm_list="$rm_list $PREMSG"
+        set +e
+        mv -f $TMPMSG $PREMSG
+        $spamc -E --headers < $PREMSG > $TMPMSG
+else
+        $spamc -E --headers > $TMPMSG
+fi
+err=$?
+
+# normal delivery
+set -e
+
+case $err in
+1) $DELIVER -m INBOX.spam < $TMPMSG ;;
+*)
+        # users may override normal delivery and have it go elsewhere
+        if test -r ~/.dc-dlvr.rc
+        then
+                . ~/.dc-dlvr.rc
+        else
+                $DELIVER -m INBOX < $TMPMSG
+        fi
+        ;;
+esac
+
+exec rm -f $rm_list
diff --git a/scripts/import_gmane_spool b/scripts/import_gmane_spool
new file mode 100755
index 00000000..b5573e15
--- /dev/null
+++ b/scripts/import_gmane_spool
@@ -0,0 +1,51 @@
+#!/usr/bin/perl -w
+# Copyright (C) 2013, Eric Wong <normalperson@yhbt.net> and all contributors
+# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
+#
+# One-off script to convert an slrnpull news spool from gmane
+use strict;
+use warnings;
+use Parallel::ForkManager;
+use Email::Simple;
+use PublicInbox::Filter;
+use IPC::Run qw(run);
+my $usage = "import_nntp_spool SLRNPULL_ROOT/news/foo/bar MAIN_REPO FAIL_REPO";
+my $spool = shift @ARGV or die "Usage: $usage\n";
+my $main_repo = shift @ARGV or die "Usage: $usage\n";
+my $fail_repo = shift @ARGV or die "Usage: $usage\n";
+my $nproc = `nproc 2>/dev/null` || 4;
+my $pm = Parallel::ForkManager->new($nproc);
+my @args = ('public-inbox-mda', $main_repo, $fail_repo);
+
+foreach my $n (<$spool/*>) {
+        $n =~ m{/\d+\z} or next;
+        $pm->start and next;
+        if (open my $fh, '<', $n) {
+                local $/;
+                my $s = Email::Simple->new(<$fh>);
+
+                # gmane rewrites Received headers, which increases spamminess
+                my @h = $s->header("Original-Received");
+                if (@h) {
+                        $s->header_set("Received", @h);
+                        $s->header_set("Original-Received");
+                }
+
+                # triggers for the SA HEADER_SPAM rule
+                foreach my $drop (qw(Approved)) { $s->header_set($drop) }
+
+                # appears to be an old gmane bug:
+                $s->header_set("connect()");
+
+                my $orig = $s->as_string;
+                close $fh or die "close failed: $!\n";
+                eval { run(\@args, \$orig) };
+                die "fail $n: $?\n" if $?;
+                die "fail $n: $@\n" if $@;
+        } else {
+                warn "Failed to open $n: $!\n";
+        }
+        $pm->finish;
+}
+
+$pm->wait_all_children;
diff --git a/scripts/report-spam b/scripts/report-spam
new file mode 100755
index 00000000..825855b5
--- /dev/null
+++ b/scripts/report-spam
@@ -0,0 +1,28 @@
+#!/bin/sh
+# Copyright (C) 2008-2013, Eric Wong <e@80x24.org>
+# License: GPLv3 or later
+# Usage: report-spam /path/to/message/in/maildir
+# my incrontab(5) looks like this:
+#  /path/to/.maildir/cur IN_MOVED_TO /path/to/report-spam $@/$#
+#  /path/to/.maildir/.INBOX.good/cur IN_MOVED_TO /path/to/report-spam $@/$#
+#  /path/to/.maildir/.INBOX.spam/cur IN_MOVED_TO /path/to/report-spam $@/$#
+
+# gigantic emails tend not to be spam (but they suck anyways...)
+bytes=$(stat -c %s $1)
+if test $bytes -gt 512000
+then
+        exit
+fi
+
+# only tested with the /usr/sbin/sendmail which ships with postfix
+case $1 in
+*[/.]spam/cur/*) # non-new messages in spam get trained
+        exec /usr/sbin/sendmail -oem -oi $USER+trainspam < $1
+        ;;
+*:2,*S*) # otherwise, seen messages only
+        case $1 in
+        *:2,*T*) exit 0 ;; # ignore trashed messages
+        esac
+        exec /usr/sbin/sendmail -oem -oi $USER+trainham < $1
+        ;;
+esac