about summary refs log tree commit homepage
path: root/script/public-inbox-learn
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2019-10-28 10:45:16 +0000
committerEric Wong <e@80x24.org>2019-10-30 08:48:08 +0000
commitbc6b44cd7feee7e83ef1dcf26092808f92f757d9 (patch)
tree5ed6648e7b57c503c44f98a3aab78878e601c879 /script/public-inbox-learn
parent83470f5e60e44d7f70b378f4b250c6584f42f64e (diff)
downloadpublic-inbox-bc6b44cd7feee7e83ef1dcf26092808f92f757d9.tar.gz
It's assumed that "spam" can end up anywhere due to Bcc:, so we
need to scan every single inbox.  However, "rm" is usually more
targeted and and "ham" obviously only belongs in some inboxes.
Diffstat (limited to 'script/public-inbox-learn')
-rwxr-xr-xscript/public-inbox-learn71
1 files changed, 35 insertions, 36 deletions
diff --git a/script/public-inbox-learn b/script/public-inbox-learn
index 8ff1652b..d2d665d5 100755
--- a/script/public-inbox-learn
+++ b/script/public-inbox-learn
@@ -39,17 +39,7 @@ my $mime = PublicInbox::MIME->new(eval {
         $data
 });
 
-# get all recipients
-my %dests;
-foreach my $h (qw(Cc To)) {
-        my @val = $mime->header($h) or next;
-        for (@val) {
-                foreach my $email (PublicInbox::Address::emails($_)) {
-                        $dests{lc($email)} = 1;
-                }
-        }
-}
-
+# spam is removed from all known inboxes
 if ($train eq 'spam') {
         $pi_config->each_inbox(sub {
                 my ($ibx) = @_;
@@ -58,36 +48,45 @@ if ($train eq 'spam') {
                 $im->remove($mime, 'spam');
                 $im->done;
         });
-}
+} else {
+        require PublicInbox::MDA if $train eq "ham";
 
-require PublicInbox::MDA if $train eq "ham";
+        # get all recipients
+        my %dests; # address => <PublicInbox::Inbox|0(false)>
+        for ($mime->header('Cc'), $mime->header('To')) {
+                foreach my $addr (PublicInbox::Address::emails($_)) {
+                        $addr = lc($addr);
+                        $dests{$addr} //= $pi_config->lookup($addr) // 0;
+                }
+        }
 
-# n.b. message may be cross-posted to multiple public-inboxes
-foreach my $recipient (keys %dests) {
-        my $dst = $pi_config->lookup($recipient) or next;
-        # We do not touch GIT_COMMITTER_* env here so we can track
-        # who trained the message.
-        $dst->{name} = $ENV{GIT_COMMITTER_NAME} || $dst->{name};
-        $dst->{-primary_address} = $ENV{GIT_COMMITTER_EMAIL} || $recipient;
-        $dst = PublicInbox::InboxWritable->new($dst);
-        my $im = $dst->importer(0);
+        # n.b. message may be cross-posted to multiple public-inboxes
+        while (my ($addr, $dst) = each %dests) {
+                next unless ref($dst);
+                # We do not touch GIT_COMMITTER_* env here so we can track
+                # who trained the message.
+                $dst->{name} = $ENV{GIT_COMMITTER_NAME} || $dst->{name};
+                $dst->{-primary_address} = $ENV{GIT_COMMITTER_EMAIL} || $addr;
+                $dst = PublicInbox::InboxWritable->new($dst);
+                my $im = $dst->importer(0);
 
-        if ($train eq "spam" || $train eq "rm") {
-                # This needs to be idempotent, as my inotify trainer
-                # may train for each cross-posted message, and this
-                # script already learns for every list in
-                # ~/.public-inbox/config
-                $im->remove($mime, $train);
-        } else { # $train eq "ham"
-                # no checking for spam here, we assume the message has
-                # been reviewed by a human at this point:
-                PublicInbox::MDA->set_list_headers($mime, $dst);
+                if ($train eq "rm") {
+                        # This needs to be idempotent, as my inotify trainer
+                        # may train for each cross-posted message, and this
+                        # script already learns for every list in
+                        # ~/.public-inbox/config
+                        $im->remove($mime, $train);
+                } elsif ($train eq "ham") {
+                        # no checking for spam here, we assume the message has
+                        # been reviewed by a human at this point:
+                        PublicInbox::MDA->set_list_headers($mime, $dst);
 
-                # Ham messages are trained when they're marked into
-                # a SEEN state, so this is idempotent:
-                $im->add($mime);
+                        # Ham messages are trained when they're marked into
+                        # a SEEN state, so this is idempotent:
+                        $im->add($mime);
+                }
+                $im->done;
         }
-        $im->done;
 }
 
 if ($err) {