learn: use "spam" as subject for removal commits

Sometimes an email is an innocent removal "rm" for a misdirected, off-topic post, while most removed messages are "spam". Allow anybody to look at history and easily distinguish the reason for removing the message.
author: Eric Wong <e@80x24.org> 2017-11-16 18:48:39 +0000
committer: Eric Wong <e@80x24.org> 2017-11-16 18:48:39 +0000
commit: 7174681c0165008ac16ed1d323f6d95b6d0570a3 (patch)
tree: bc0bb3dcc0e8cb2fe3b8f5a9e3205651a2df46dd
parent: b223e6f49debb99b9132bc85d97a065ebcee00b9 (diff)
download: public-inbox-7174681c0165008ac16ed1d323f6d95b6d0570a3.tar.gz
2 files changed, 5 insertions, 3 deletions
diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm
index 31dc7695..8c9d1cba 100644
--- a/lib/PublicInbox/Import.pm
+++ b/lib/PublicInbox/Import.pm
@@ -90,7 +90,7 @@ sub _check_path ($$$$) {
  # ('MISMATCH', msg) on mismatch
  # (:MARK, msg) on success
  sub remove {
-        my ($self, $mime) = @_; # mime = Email::MIME
+        my ($self, $mime, $msg) = @_; # mime = Email::MIME
  
          my $mid = mid_mime($mime);
          my $path = mid2path($mid);
@@ -138,10 +138,12 @@ sub remove {
          }
          my $ident = $self->{ident};
          my $now = now2822();
+        $msg ||= 'rm';
+        my $len = length($msg) + 1;
          print $w "commit $ref\nmark :$commit\n",
                  "author $ident $now\n",
                  "committer $ident $now\n",
-                "data 3\nrm\n\n",
+                "data $len\n$msg\n\n",
                  'from ', ($parent ? $parent : $tip), "\n" or wfail;
          print $w "D $path\n\n" or wfail;
          $self->{nchg}++;
diff --git a/script/public-inbox-learn b/script/public-inbox-learn
index 71aa50f9..75294a0a 100755
--- a/script/public-inbox-learn
+++ b/script/public-inbox-learn
@@ -81,7 +81,7 @@ foreach my $recipient (keys %dests) {
                  # may train for each cross-posted message, and this
                  # script already learns for every list in
                  # ~/.public-inbox/config
-                $im->remove($mime);
+                $im->remove($mime, $train);
          } else { # $train eq "ham"
                  # no checking for spam here, we assume the message has
                  # been reviewed by a human at this point:
author	Eric Wong <e@80x24.org>	2017-11-16 18:48:39 +0000
committer	Eric Wong <e@80x24.org>	2017-11-16 18:48:39 +0000
commit	7174681c0165008ac16ed1d323f6d95b6d0570a3 (patch)
tree	bc0bb3dcc0e8cb2fe3b8f5a9e3205651a2df46dd
parent	b223e6f49debb99b9132bc85d97a065ebcee00b9 (diff)
download	public-inbox-7174681c0165008ac16ed1d323f6d95b6d0570a3.tar.gz