about summary refs log tree commit homepage
path: root/lib/PublicInbox/ContentId.pm
diff options
context:
space:
mode:
authorEric Wong (Contractor, The Linux Foundation) <e@80x24.org>2018-02-10 05:13:19 +0000
committerEric Wong (Contractor, The Linux Foundation) <e@80x24.org>2018-02-12 17:25:13 +0000
commit5b491c0b308e576897cfcc270647ba4e35c6cc8a (patch)
tree37ae103a0931c3fb99456f5e375f69e3716d7ee5 /lib/PublicInbox/ContentId.pm
parentd8bc86742a146f7665f0548cf855c2b2153617e0 (diff)
downloadpublic-inbox-5b491c0b308e576897cfcc270647ba4e35c6cc8a.tar.gz
Call order will need to change a bit since this is going to be
tied to Xapian
Diffstat (limited to 'lib/PublicInbox/ContentId.pm')
-rw-r--r--lib/PublicInbox/ContentId.pm30
1 files changed, 30 insertions, 0 deletions
diff --git a/lib/PublicInbox/ContentId.pm b/lib/PublicInbox/ContentId.pm
new file mode 100644
index 00000000..65d5a76c
--- /dev/null
+++ b/lib/PublicInbox/ContentId.pm
@@ -0,0 +1,30 @@
+# Copyright (C) 2018 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+package PublicInbox::ContentId;
+use strict;
+use warnings;
+use base qw/Exporter/;
+our @EXPORT_OK = qw/content_id/;
+
+# not sure if less-widely supported hash families are worth bothering with
+use Digest::SHA;
+
+# Content-* headers are often no-ops, so maybe we don't need them
+my @ID_HEADERS = qw(Subject From Date Message-ID References To Cc In-Reply-To);
+
+sub content_id ($;$) {
+        my ($mime, $alg) = @_;
+        $alg ||= 256;
+        my $dig = Digest::SHA->new($alg);
+        my $hdr = $mime->header_obj;
+
+        foreach my $h (@ID_HEADERS) {
+                my @v = $hdr->header_raw($h);
+                $dig->add($_) foreach @v;
+        }
+        $dig->add($mime->body_raw);
+        'SHA-' . $dig->algorithm . ':' . $dig->hexdigest;
+}
+
+1;