about summary refs log tree commit homepage
path: root/lib/PublicInbox/Import.pm
diff options
context:
space:
mode:
authorEric Wong (Contractor, The Linux Foundation) <e@80x24.org>2018-03-22 18:21:54 +0000
committerEric Wong (Contractor, The Linux Foundation) <e@80x24.org>2018-03-22 18:21:54 +0000
commitf6285ab9d73a4eae490dda325096e61eadc415cd (patch)
treee9f975293c995d6d65e1aa7264fb6aaf0fadbd29 /lib/PublicInbox/Import.pm
parent2eea27a1e97ce522e3e1e63499a2bf7e472d8ee9 (diff)
downloadpublic-inbox-f6285ab9d73a4eae490dda325096e61eadc415cd.tar.gz
This also quiets down warnings from -watch when spam training
happens on messages without Message-Id.
Diffstat (limited to 'lib/PublicInbox/Import.pm')
-rw-r--r--lib/PublicInbox/Import.pm31
1 files changed, 24 insertions, 7 deletions
diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm
index 5d116a1c..6824faca 100644
--- a/lib/PublicInbox/Import.pm
+++ b/lib/PublicInbox/Import.pm
@@ -203,7 +203,7 @@ sub remove {
         my ($r, $w) = $self->gfi_start;
         my $tip = $self->{tip};
         if ($path_type eq '2/38') {
-                $path = mid2path(mid_mime($mime));
+                $path = mid2path(v1_mid0($mime));
                 ($err, $cur) = check_remove_v1($r, $w, $tip, $path, $mime);
                 return ($err, $cur) if $err;
         } else {
@@ -296,6 +296,28 @@ sub drop_unwanted_headers ($) {
         $mime->header_set($_) for @PublicInbox::MDA::BAD_HEADERS;
 }
 
+# used by V2Writable, too
+sub prepend_mid ($$) {
+        my ($hdr, $mid0) = @_;
+        # @cur is likely empty if we need to call this sub, but it could
+        # have random unparseable crap which we'll preserve, too.
+        my @cur = $hdr->header_raw('Message-Id');
+        $hdr->header_set('Message-Id', "<$mid0>", @cur);
+}
+
+sub v1_mid0 ($) {
+        my ($mime) = @_;
+        my $hdr = $mime->header_obj;
+        my $mids = mids($hdr);
+
+        if (!scalar(@$mids)) { # spam often has no Message-Id
+                my $mid0 = digest2mid(content_digest($mime));
+                prepend_mid($hdr, $mid0);
+                return $mid0;
+        }
+        $mids->[0];
+}
+
 # returns undef on duplicate
 # returns the :MARK of the most recent commit
 sub add {
@@ -313,12 +335,7 @@ sub add {
 
         my $path;
         if ($path_type eq '2/38') {
-                my $mids = mids($mime->header_obj);
-                if (!scalar(@$mids)) {
-                        my $dig = content_digest($mime);
-                        @$mids = (digest2mid($dig));
-                }
-                $path = mid2path($mids->[0]);
+                $path = mid2path(v1_mid0($mime));
         } else { # v2 layout, one file:
                 $path = 'm';
         }