From 00ee23808c651b7d9a9b1210871b91dd69ce4153 Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Fri, 2 Mar 2018 09:38:35 +0000 Subject: mid: add `mids' and `references' methods for extraction We'll be using a more consistent API for extracting Message-IDs from various headers. --- lib/PublicInbox/MID.pm | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/PublicInbox/MID.pm b/lib/PublicInbox/MID.pm index 2c9822f4..786c056f 100644 --- a/lib/PublicInbox/MID.pm +++ b/lib/PublicInbox/MID.pm @@ -6,7 +6,8 @@ package PublicInbox::MID; use strict; use warnings; use base qw/Exporter/; -our @EXPORT_OK = qw/mid_clean id_compress mid2path mid_mime mid_escape MID_ESC/; +our @EXPORT_OK = qw/mid_clean id_compress mid2path mid_mime mid_escape MID_ESC + mids references/; use URI::Escape qw(uri_escape_utf8); use Digest::SHA qw/sha1_hex/; use constant MID_MAX => 40; # SHA-1 hex length @@ -48,6 +49,27 @@ sub mid2path { sub mid_mime ($) { $_[0]->header_obj->header_raw('Message-ID') } +sub uniq_mids { + my ($hdr, @fields) = @_; + my %seen; + my @raw; + foreach my $f (@fields) { + push @raw, $hdr->header_raw($f); + } + my @mids = (join(' ', @raw) =~ /<([^>]+)>/g); + my $mids = scalar(@mids) == 0 ? \@raw: \@mids; + my @ret; + foreach (@$mids) { + next if $seen{$_}; + push @ret, $_; + $seen{$_} = 1; + } + \@ret; +} + +sub mids { uniq_mids($_[0], 'Message-Id') } +sub references { uniq_mids($_[0], 'References', 'In-Reply-To') } + # RFC3986, section 3.3: sub MID_ESC () { '^A-Za-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@' } sub mid_escape ($) { uri_escape_utf8($_[0], MID_ESC) } -- cgit v1.2.3-24-ge0c7