From 12c3eb5b2b0860292a32d5743ea0157996e9a4b9 Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Mon, 19 Mar 2018 08:14:46 +0000 Subject: import: force Message-ID generation for v1 here This allows us to share code for generating Message-IDs between v1 and v2 repos. For v1, this introduces a slight incompatibility in message removal iff the original message lacked a Message-ID AND the training request came from a message which did not pass through the public-inbox: The workaround for this would be to reuse the bad message from the archive itself. --- lib/PublicInbox/Import.pm | 15 +++++++++++++-- lib/PublicInbox/V2Writable.pm | 6 +++--- lib/PublicInbox/WatchMaildir.pm | 14 -------------- 3 files changed, 16 insertions(+), 19 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index 12df7d59..4c007b61 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -9,9 +9,10 @@ use strict; use warnings; use Fcntl qw(:flock :DEFAULT); use PublicInbox::Spawn qw(spawn); -use PublicInbox::MID qw(mid_mime mid2path); +use PublicInbox::MID qw(mids mid_mime mid2path); use PublicInbox::Address; use PublicInbox::MsgTime qw(msg_timestamp); +use PublicInbox::ContentId qw(content_digest); sub new { my ($class, $git, $name, $email, $ibx) = @_; @@ -308,7 +309,12 @@ sub add { my $path; if ($path_type eq '2/38') { - $path = mid2path(mid_mime($mime)); + my $mids = mids($mime->header_obj); + if (!scalar(@$mids)) { + my $dig = content_digest($mime); + @$mids = (digest2mid($dig)); + } + $path = mid2path($mids->[0]); } else { # v2 layout, one file: $path = 'm'; } @@ -393,6 +399,11 @@ sub atfork_child { } } +sub digest2mid ($) { + my ($dig) = @_; + $dig->clone->hexdigest . '@localhost'; +} + 1; __END__ =pod diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index fbc71c89..a305842e 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -146,19 +146,19 @@ sub num_for_harder { my $hdr = $mime->header_obj; my $dig = content_digest($mime); - $$mid0 = $dig->clone->hexdigest . '@localhost'; + $$mid0 = PublicInbox::Import::digest2mid($dig); my $num = $self->{skel}->{mm}->mid_insert($$mid0); unless (defined $num) { # it's hard to spoof the last Received: header my @recvd = $hdr->header_raw('Received'); $dig->add("Received: $_") foreach (@recvd); - $$mid0 = $dig->clone->hexdigest . '@localhost'; + $$mid0 = PublicInbox::Import::digest2mid($dig); $num = $self->{skel}->{mm}->mid_insert($$mid0); # fall back to a random Message-ID and give up determinism: until (defined($num)) { $dig->add(rand); - $$mid0 = $dig->clone->hexdigest . '@localhost'; + $$mid0 = PublicInbox::Import::digest2mid($dig); warn "using random Message-ID <$$mid0> as fallback\n"; $num = $self->{skel}->{mm}->mid_insert($$mid0); } diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm index f2d3db95..3adebdde 100644 --- a/lib/PublicInbox/WatchMaildir.pm +++ b/lib/PublicInbox/WatchMaildir.pm @@ -12,8 +12,6 @@ use PublicInbox::Import; use PublicInbox::MDA; use PublicInbox::Spawn qw(spawn); use File::Temp qw//; -use PublicInbox::MID qw(mids); -use PublicInbox::ContentId qw(content_digest); sub new { my ($class, $config) = @_; @@ -127,7 +125,6 @@ sub _remove_spam { # path must be marked as (S)een $path =~ /:2,[A-R]*S[T-Za-z]*\z/ or return; my $mime = _path_to_mime($path) or return; - _force_mid($mime); $self->{config}->each_inbox(sub { my ($ibx) = @_; eval { @@ -146,16 +143,6 @@ sub _remove_spam { }) } -sub _force_mid { - my ($mime) = @_; - my $hdr = $mime->header_obj; - my $mids = mids($hdr); - return if @$mids; - my $dig = content_digest($mime); - my $mid = $dig->clone->hexdigest . '@localhost'; - $hdr->header_set('Message-Id', $mid); -} - sub _try_path { my ($self, $path) = @_; my @p = split(m!/+!, $path); @@ -191,7 +178,6 @@ sub _try_path { $mime = $ret; } - _force_mid($mime); $im->add($mime, $self->{spamcheck}); } -- cgit v1.2.3-24-ge0c7