user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
blob 65d5a76c09b43f4dec2edb08998c22ffe6d6e95e 791 bytes (raw)
name: lib/PublicInbox/ContentId.pm 	 # note: path name is non-authoritative(*)

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
 
# Copyright (C) 2018 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>

package PublicInbox::ContentId;
use strict;
use warnings;
use base qw/Exporter/;
our @EXPORT_OK = qw/content_id/;

# not sure if less-widely supported hash families are worth bothering with
use Digest::SHA;

# Content-* headers are often no-ops, so maybe we don't need them
my @ID_HEADERS = qw(Subject From Date Message-ID References To Cc In-Reply-To);

sub content_id ($;$) {
	my ($mime, $alg) = @_;
	$alg ||= 256;
	my $dig = Digest::SHA->new($alg);
	my $hdr = $mime->header_obj;

	foreach my $h (@ID_HEADERS) {
		my @v = $hdr->header_raw($h);
		$dig->add($_) foreach @v;
	}
	$dig->add($mime->body_raw);
	'SHA-' . $dig->algorithm . ':' . $dig->hexdigest;
}

1;

debug log:

solving 65d5a76 ...
found 65d5a76 in https://80x24.org/public-inbox.git

(*) Git path names are given by the tree(s) the blob belongs to.
    Blobs themselves have no identifier aside from the hash of its contents.^

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).