4 files changed, 74 insertions, 13 deletions
diff --git a/Documentation/dc-dlvr-spam-flow.txt b/Documentation/dc-dlvr-spam-flow.txt
new file mode 100644
index 00000000..2cdcefa5
--- /dev/null
+++ b/Documentation/dc-dlvr-spam-flow.txt
@@ -0,0 +1,38 @@
+dc-dlvr spam/ham training system flow
+-------------------------------------
+
+An overview of the Maildir + inotify-based spam training system Eric
+uses on his mail server.  This idea may be implemented for kqueue-based
+systems, too.
+
+The idea is to use inotify (via incron) to watch for new files appearing
+in Maildirs.  We only want to train seen messages as ham, and old (but
+not necessarily seen) messages as spam.  The overall goal of this is to
+allow a user to train their filters without leaving his favorite mail
+user agent.
+
+Every message written to Maildir involves a rename, so we only
+have incron watch for IN_MOVED_TO events.
+
+The generic flow is as follows, all for a single Unix user account:
+
+    incron -> report-spam +-> sendmail -> MTA -> dc-dlvr -> spamc -> spamd
+                          |
+                          V
+                         ...
+
+For public-inbox, Eric uses a separate Unix account ("pi") to add a
+layer of protection from fat-fingering something.  So his report-spam
+script delivers to a second recipient for training, the "pi" user:
+                         ...
+                          |
+                          +-> sendmail -> MTA -> dc-dlvr
+                                                    |
+                                                    V
+                                            ~pi/.dc-dlvr.pre
+                                                    |
+                                                    V
+                                           public-inbox-learn
+
+public-inbox-learn will then internally handle the "spamc -> spamd"
+delivery path as well as calling ssoma-rm on falsely trained
diff --git a/scripts/dc-dlvr b/scripts/dc-dlvr
index 68123f84..ca64505c 100755
--- a/scripts/dc-dlvr
+++ b/scripts/dc-dlvr
@@ -1,6 +1,7 @@
  #!/bin/sh
  # Copyright (C) 2008-2013, Eric Wong <e@80x24.org>
  # License: GPLv3 or later <http://www.gnu.org/licenses/gpl-3.0.txt>
+# This is installed as /etc/dc-dcvr on my system
  # to use with postfix main.cf: mailbox_command = /etc/dc-dlvr "$EXTENSION"
  DELIVER=/usr/lib/dovecot/deliver
  
@@ -11,7 +12,7 @@ catchall) exec $DELIVER ;;
  esac
  
  # change if your spamc/spamd listens elsewhere
-spamc='spamc -U /run/spamd.sock'
+spamc='spamc'
  
  # allow plus addressing to train spam filters, $1 is the $EXTENSION
  # which may be "trainspam" or "trainham".  Only allow spam training
@@ -30,11 +31,14 @@ then
          set -e
          cat > $TMPMSG
          DEFAULT_INBOX=$(. ~/.dc-dlvr.pre)
-        if test xINBOX != x"$DEFAULT_INBOX"
-        then
+        case $DEFAULT_INBOX in
+        '') exec rm -f $rm_list ;;
+        INBOX) ;; # do nothing
+        *)
                  $DELIVER -m $DEFAULT_INBOX < $TMPMSG
                  exec rm -f $rm_list
-        fi
+                ;;
+        esac
          PREMSG=$(mktemp -t dc-dlvr.orig.$USER.XXXXXX || exit 1)
          rm_list="$rm_list $PREMSG"
          set +e
diff --git a/scripts/dc-dlvr.pre b/scripts/dc-dlvr.pre
new file mode 100644
index 00000000..9183a96e
--- /dev/null
+++ b/scripts/dc-dlvr.pre
@@ -0,0 +1,12 @@
+# Copyright (C) 2014, Eric Wong <e@80x24.org>
+# License: AGPLv3 or later <http://www.gnu.org/licenses/agpl-3.0.txt>
+# sourced by /etc/dc-dlvr in ~$PI_USER/.dc-dlvr.rc, this just exits,
+# aborting /etc/dc-dlvr
+export PATH=/usr/local/bin:/usr/bin:/bin
+exec 2>> ~/log/dc-dlvr.pre.err
+trap 'err=$?; set +e; test $err -eq 0 || rm -f $TMPMSG; exit $err' EXIT
+case $1,$CLIENT_ADDRESS in
+pispam,) exec public-inbox-learn spam < $TMPMSG ;;
+piham,) exec public-inbox-learn ham < $TMPMSG ;;
+esac
+exec public-inbox-mda < $TMPMSG
diff --git a/scripts/report-spam b/scripts/report-spam
index 75200431..0015ef0b 100755
--- a/scripts/report-spam
+++ b/scripts/report-spam
@@ -1,12 +1,11 @@
  #!/bin/sh
-# Copyright (C) 2008-2013, Eric Wong <e@80x24.org>
+# Copyright (C) 2008-2014, Eric Wong <e@80x24.org>
  # License: GPLv3 or later <http://www.gnu.org/licenses/gpl-3.0.txt>
  # Usage: report-spam /path/to/message/in/maildir
-# This is intended to be used with incron or similar systems.
+# This is intended for use with incron or similar systems.
  # my incrontab(5) looks like this:
-#  /path/to/.maildir/cur IN_MOVED_TO /path/to/report-spam $@/$#
-#  /path/to/.maildir/.INBOX.good/cur IN_MOVED_TO /path/to/report-spam $@/$#
-#  /path/to/.maildir/.INBOX.spam/cur IN_MOVED_TO /path/to/report-spam $@/$#
+#  /path/to/maildir/.INBOX.good/cur IN_MOVED_TO /path/to/report-spam $@/$#
+#  /path/to/maildir/.INBOX.spam/cur IN_MOVED_TO /path/to/report-spam $@/$#
  
  # gigantic emails tend not to be spam (but they suck anyways...)
  bytes=$(stat -c %s $1)
@@ -21,18 +20,26 @@ fi
  # incrond has no concurrency limits and will fork a new process on
  # every single event, which sucks with rename storms when a client
  # commits folder changes.  The sendmail executable exits quickly and
-# queues up the message for training.  This shoudl also ensure fairness
+# queues up the message for training.  This should also ensure fairness
  # to newly arriving mail.  Instead of installing/configuring
  # another queueing system, I reuse the queue in the MTA.
-# See scripts/dc-dlvr for corresponding trainspam/trainham handlers.
+# See scripts/dc-dlvr for corresponding trainspam/trainham handlers,
+# which are for my personal bayes training, and scripts/dc-dlvr.pre
+# for the pispam/piham handlers for training emails going to public-inbox
+
+DO_SENDMAIL='/usr/sbin/sendmail -oi'
+PI_USER=pi
+
  case $1 in
  *[/.]spam/cur/*) # non-new messages in spam get trained
-        exec /usr/sbin/sendmail -oem -oi $USER+trainspam < $1
+        $DO_SENDMAIL $PI_USER+pispam < $1
+        exec $DO_SENDMAIL $USER+trainspam < $1
          ;;
  *:2,*S*) # otherwise, seen messages only
          case $1 in
          *:2,*T*) exit 0 ;; # ignore trashed messages
          esac
-        exec /usr/sbin/sendmail -oem -oi $USER+trainham < $1
+        $DO_SENDMAIL $PI_USER+piham < $1
+        exec $DO_SENDMAIL $USER+trainham < $1
          ;;
  esac