From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.0 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 0CC161FAED for ; Tue, 6 Mar 2018 08:42:43 +0000 (UTC) From: "Eric Wong (Contractor, The Linux Foundation)" To: meta@public-inbox.org Subject: [PATCH 11/34] content_id: no need to be human-friendly Date: Tue, 6 Mar 2018 08:42:19 +0000 Message-Id: <20180306084242.19988-12-e@80x24.org> In-Reply-To: <20180306084242.19988-1-e@80x24.org> References: <20180306084242.19988-1-e@80x24.org> List-Id: We merely use this for internal comparisons and do not store this in Xapian. So using a shorter, non-human readable digest is enough. Furthermore, introduce "content_digest" which returns the Digest::SHA object for extra changes. --- lib/PublicInbox/ContentId.pm | 15 +++++++++------ t/content_id.t | 5 +++-- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/lib/PublicInbox/ContentId.pm b/lib/PublicInbox/ContentId.pm index d1a009e..8347de2 100644 --- a/lib/PublicInbox/ContentId.pm +++ b/lib/PublicInbox/ContentId.pm @@ -5,7 +5,7 @@ package PublicInbox::ContentId; use strict; use warnings; use base qw/Exporter/; -our @EXPORT_OK = qw/content_id/; +our @EXPORT_OK = qw/content_id content_digest/; use PublicInbox::MID qw(mids references); # not sure if less-widely supported hash families are worth bothering with @@ -14,10 +14,9 @@ use Digest::SHA; # Content-* headers are often no-ops, so maybe we don't need them my @ID_HEADERS = qw(Subject From Date To Cc); -sub content_id ($;$) { - my ($mime, $alg) = @_; - $alg ||= 256; - my $dig = Digest::SHA->new($alg); +sub content_digest ($) { + my ($mime) = @_; + my $dig = Digest::SHA->new(256); my $hdr = $mime->header_obj; # References: and In-Reply-To: get used interchangeably @@ -37,7 +36,11 @@ sub content_id ($;$) { $dig->add("$h: $_") foreach @v; } $dig->add($mime->body_raw); - 'SHA-' . $dig->algorithm . ':' . $dig->hexdigest; + $dig; +} + +sub content_id ($) { + content_digest($_[0])->digest; } 1; diff --git a/t/content_id.t b/t/content_id.t index c0ae6ec..adcdb6c 100644 --- a/t/content_id.t +++ b/t/content_id.t @@ -18,7 +18,8 @@ my $mime = Email::MIME->create( body => "hello world\n", ); -my $res = content_id($mime); -like($res, qr/\ASHA-256:[a-f0-9]{64}\z/, 'cid in format expected'); +my $orig = content_id($mime); +my $reload = content_id(Email::MIME->new($mime->as_string)); +is($orig, $reload, 'content_id matches after serialization'); done_testing(); -- EW