From dfed6cc6f2881c77478174dd5eb9b93352b1f1c1 Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Mon, 19 Mar 2018 23:24:50 +0000 Subject: content_id: do not take Message-Id into account If we need to use content_id, we've already lost hope in relying on Message-Id as a differentiator. This prevents duplicates from showing up repeatedly with -watch when Message-Ids are reused and we generate new Message-Ids to disambiguate. --- lib/PublicInbox/ContentId.pm | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib/PublicInbox/ContentId.pm') diff --git a/lib/PublicInbox/ContentId.pm b/lib/PublicInbox/ContentId.pm index 9082b769..279eec0c 100644 --- a/lib/PublicInbox/ContentId.pm +++ b/lib/PublicInbox/ContentId.pm @@ -21,7 +21,8 @@ sub content_digest ($) { # in SearchIdx, so treat them the same for this: my %seen; foreach my $mid (@{mids($hdr)}) { - $dig->add('mid: '.$mid); + # do NOT consider the Message-ID as part of the content_id + # if we got here, we've already got Message-ID reuse $seen{$mid} = 1; } foreach my $mid (@{references($hdr)}) { -- cgit v1.2.3-24-ge0c7