From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 8D16C1F462 for ; Fri, 14 Jun 2019 00:53:28 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] nntp: filter out duplicate Message-IDs for leafnode Date: Fri, 14 Jun 2019 00:53:28 +0000 Message-Id: <20190614005328.2474-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: It's the unfortunate reality that there are some clients which reuse Message-IDs (in which we generate + use another) or set multiple Message-IDs on their own. While the v2 format addresses that, NNTP clients such as leafnode are not always prepared to deal with that case. So, ensure NNTP clients only see a single Message-ID, and show the others as 'X-Alt-Message-ID'. --- lib/PublicInbox/NNTP.pm | 16 ++++++++++++++++ t/nntpd.t | 17 +++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm index d409e78..8a31b91 100644 --- a/lib/PublicInbox/NNTP.pm +++ b/lib/PublicInbox/NNTP.pm @@ -438,6 +438,22 @@ sub set_nntp_headers ($$$$$) { # reason. We'll fake the shortest one possible. $hdr->header_set('Path', 'y'); + # leafnode (and maybe other NNTP clients) have trouble dealing + # with v2 messages which have multiple Message-IDs (either due + # to our own content-based dedupe or buggy git-send-email versions). + my @mids = $hdr->header('Message-ID'); + if (scalar(@mids) > 1) { + my $mid0 = "<$mid>"; + $hdr->header_set('Message-ID', $mid0); + my @alt = $hdr->header('X-Alt-Message-ID'); + my %seen = map { $_ => 1 } (@alt, $mid0); + foreach my $m (@mids) { + next if $seen{$m}++; + push @alt, $m; + } + $hdr->header_set('X-Alt-Message-ID', @alt); + } + # clobber some my $xref = xref($self, $ng, $n, $mid); $hdr->header_set('Xref', $xref); diff --git a/t/nntpd.t b/t/nntpd.t index cce21ee..a95fb6f 100644 --- a/t/nntpd.t +++ b/t/nntpd.t @@ -250,6 +250,23 @@ Date: Fri, 02 Oct 1993 00:00:00 +0000 my $expect = qr/\AMessage-ID: /i . qr/\Q<$long_hdr>\E/; ok(scalar(grep(/$expect/, @$hdr)), 'Message-ID not folded'); ok(scalar(grep(/^Path:/, @$hdr)), 'Path: header found'); + + # it's possible for v2 messages to have 2+ Message-IDs, + # but leafnode can't handle it + if ($version != 1) { + my @mids = ("<$long_hdr>", '<2mid@wtf>'); + $for_leafnode->header_set('Message-ID', @mids); + $for_leafnode->body_set('not-a-dupe'); + my $warn = ''; + $SIG{__WARN__} = sub { $warn .= join('', @_) }; + $im->add($for_leafnode); + $im->done; + like($warn, qr/reused/, 'warned for reused MID'); + $hdr = $n->head('<2mid@wtf>'); + my @hmids = grep(/\AMessage-ID: /i, @$hdr); + is(scalar(@hmids), 1, 'Single Message-ID in header'); + like($hmids[0], qr/: <2mid\@wtf>/, 'got expected mid'); + } } # pipelined requests: -- EW