From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.0 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id C90901FAEA for ; Tue, 6 Mar 2018 08:42:42 +0000 (UTC) From: "Eric Wong (Contractor, The Linux Foundation)" To: meta@public-inbox.org Subject: [PATCH 08/34] mid: add `mids' and `references' methods for extraction Date: Tue, 6 Mar 2018 08:42:16 +0000 Message-Id: <20180306084242.19988-9-e@80x24.org> In-Reply-To: <20180306084242.19988-1-e@80x24.org> References: <20180306084242.19988-1-e@80x24.org> List-Id: We'll be using a more consistent API for extracting Message-IDs from various headers. --- lib/PublicInbox/MID.pm | 24 +++++++++++++++++++++++- t/mid.t | 22 +++++++++++++++++++++- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/lib/PublicInbox/MID.pm b/lib/PublicInbox/MID.pm index 2c9822f..786c056 100644 --- a/lib/PublicInbox/MID.pm +++ b/lib/PublicInbox/MID.pm @@ -6,7 +6,8 @@ package PublicInbox::MID; use strict; use warnings; use base qw/Exporter/; -our @EXPORT_OK = qw/mid_clean id_compress mid2path mid_mime mid_escape MID_ESC/; +our @EXPORT_OK = qw/mid_clean id_compress mid2path mid_mime mid_escape MID_ESC + mids references/; use URI::Escape qw(uri_escape_utf8); use Digest::SHA qw/sha1_hex/; use constant MID_MAX => 40; # SHA-1 hex length @@ -48,6 +49,27 @@ sub mid2path { sub mid_mime ($) { $_[0]->header_obj->header_raw('Message-ID') } +sub uniq_mids { + my ($hdr, @fields) = @_; + my %seen; + my @raw; + foreach my $f (@fields) { + push @raw, $hdr->header_raw($f); + } + my @mids = (join(' ', @raw) =~ /<([^>]+)>/g); + my $mids = scalar(@mids) == 0 ? \@raw: \@mids; + my @ret; + foreach (@$mids) { + next if $seen{$_}; + push @ret, $_; + $seen{$_} = 1; + } + \@ret; +} + +sub mids { uniq_mids($_[0], 'Message-Id') } +sub references { uniq_mids($_[0], 'References', 'In-Reply-To') } + # RFC3986, section 3.3: sub MID_ESC () { '^A-Za-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@' } sub mid_escape ($) { uri_escape_utf8($_[0], MID_ESC) } diff --git a/t/mid.t b/t/mid.t index 0bf3331..223be79 100644 --- a/t/mid.t +++ b/t/mid.t @@ -1,11 +1,31 @@ # Copyright (C) 2016-2018 all contributors # License: AGPL-3.0+ use Test::More; -use PublicInbox::MID qw(mid_escape); +use PublicInbox::MID qw(mid_escape mids references); is(mid_escape('foo!@(bar)'), 'foo!@(bar)'); is(mid_escape('foo%!@(bar)'), 'foo%25!@(bar)'); is(mid_escape('foo%!@(bar)'), 'foo%25!@(bar)'); +{ + use Email::MIME; + my $mime = Email::MIME->create; + $mime->header_set('Message-Id', ''); + is_deeply(['mid-1@a'], mids($mime->header_obj), 'mids in common case'); + $mime->header_set('Message-Id', '', ''); + is_deeply(['mid-1@a', 'mid-2@b'], mids($mime->header_obj), '2 mids'); + $mime->header_set('Message-Id', '', ''); + is_deeply(['mid-1@a'], mids($mime->header_obj), 'dup mids'); + $mime->header_set('Message-Id', ' comment'); + is_deeply(['mid-1@a'], mids($mime->header_obj), 'comment ignored'); + $mime->header_set('Message-Id', 'bare-mid'); + is_deeply(['bare-mid'], mids($mime->header_obj), 'bare mid OK'); + + $mime->header_set('References', ' '); + $mime->header_set('In-Reply-To', ''); + is_deeply(['hello', 'world', 'weld'], references($mime->header_obj), + 'references combines with In-Reply-To'); +} + done_testing(); 1; -- EW