From e6aa13bccb7ea5d5b3246b3a944621515905e360 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 29 Jan 2023 10:30:41 +0000 Subject: use Net::SSLeay (OpenSSL) for SHA-(1|256) if installed On my x86-64 machine, OpenSSL SHA-256 is nearly twice as fast as the Digest::SHA implementation from Perl, most likely due to an optimized assembly implementation. SHA-1 is a few percent faster, too. --- lib/PublicInbox/ContentDigestDbg.pm | 4 +-- lib/PublicInbox/ContentHash.pm | 11 +++---- lib/PublicInbox/Fetch.pm | 4 +-- lib/PublicInbox/Git.pm | 4 +-- lib/PublicInbox/LeiDedupe.pm | 6 ++-- lib/PublicInbox/LeiMirror.pm | 2 +- lib/PublicInbox/LeiSavedSearch.pm | 4 +-- lib/PublicInbox/LeiSucks.pm | 12 ++++---- lib/PublicInbox/Linkify.pm | 2 +- lib/PublicInbox/MID.pm | 8 ++--- lib/PublicInbox/MdirReader.pm | 4 +-- lib/PublicInbox/NNTP.pm | 2 +- lib/PublicInbox/SHA.pm | 58 +++++++++++++++++++++++++++++++++++++ lib/PublicInbox/WwwAtomStream.pm | 2 +- 14 files changed, 91 insertions(+), 32 deletions(-) create mode 100644 lib/PublicInbox/SHA.pm (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/ContentDigestDbg.pm b/lib/PublicInbox/ContentDigestDbg.pm index 425e8589..899afbbe 100644 --- a/lib/PublicInbox/ContentDigestDbg.pm +++ b/lib/PublicInbox/ContentDigestDbg.pm @@ -3,9 +3,9 @@ package PublicInbox::ContentDigestDbg; # cf. PublicInbox::ContentDigest use v5.12; use Data::Dumper; -use Digest::SHA; +use PublicInbox::SHA; -sub new { bless { dig => Digest::SHA->new(256), fh => $_[1] }, __PACKAGE__ } +sub new { bless { dig => PublicInbox::SHA->new(256), fh => $_[1] }, __PACKAGE__ } sub add { $_[0]->{dig}->add($_[1]); diff --git a/lib/PublicInbox/ContentHash.pm b/lib/PublicInbox/ContentHash.pm index 1afbb413..d3ff146a 100644 --- a/lib/PublicInbox/ContentHash.pm +++ b/lib/PublicInbox/ContentHash.pm @@ -15,7 +15,8 @@ use PublicInbox::MID qw(mids references); use PublicInbox::MsgIter; # not sure if less-widely supported hash families are worth bothering with -use Digest::SHA; +use PublicInbox::SHA; # faster, but no ->clone +use Digest::SHA; # we still need this for ->clone sub digest_addr ($$$) { my ($dig, $h, $v) = @_; @@ -93,15 +94,15 @@ sub content_digest ($;$) { } sub content_hash ($) { - content_digest($_[0])->digest; + content_digest($_[0], PublicInbox::SHA->new(256))->digest; } +# don't clone the result of this sub git_sha ($$) { my ($n, $eml) = @_; - my $dig = Digest::SHA->new($n); + my $dig = PublicInbox::SHA->new($n); my $bref = ref($eml) eq 'SCALAR' ? $eml : \($eml->as_string); - $dig->add('blob '.length($$bref)."\0"); - $dig->add($$bref); + $dig->add('blob '.length($$bref)."\0", $$bref); $dig; } diff --git a/lib/PublicInbox/Fetch.pm b/lib/PublicInbox/Fetch.pm index 198e2a60..f93eeebe 100644 --- a/lib/PublicInbox/Fetch.pm +++ b/lib/PublicInbox/Fetch.pm @@ -92,9 +92,9 @@ sub do_manifest ($$$) { sub get_fingerprint2 { my ($git_dir) = @_; - require Digest::SHA; + require PublicInbox::SHA; my $rd = popen_rd([qw(git show-ref)], undef, { -C => $git_dir }); - Digest::SHA::sha256(do { local $/; <$rd> }); + PublicInbox::SHA::sha256(do { local $/; <$rd> }); } sub writable_dir ($) { diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index 3e2b435c..fd7a0382 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -20,7 +20,7 @@ use PublicInbox::Spawn qw(popen_rd which); use PublicInbox::Tmpfile; use IO::Poll qw(POLLIN); use Carp qw(croak carp); -use Digest::SHA (); +use PublicInbox::SHA (); use PublicInbox::DS qw(awaitpid); our @EXPORT_OK = qw(git_unquote git_quote); our $PIPE_BUFSIZ = 65536; # Linux default @@ -630,7 +630,7 @@ sub cloneurl { sub manifest_entry { my ($self, $epoch, $default_desc) = @_; my $fh = $self->popen('show-ref'); - my $dig = Digest::SHA->new(1); + my $dig = PublicInbox::SHA->new(1); while (read($fh, my $buf, 65536)) { $dig->add($buf); } diff --git a/lib/PublicInbox/LeiDedupe.pm b/lib/PublicInbox/LeiDedupe.pm index 32f99cd0..22864508 100644 --- a/lib/PublicInbox/LeiDedupe.pm +++ b/lib/PublicInbox/LeiDedupe.pm @@ -1,10 +1,10 @@ -# Copyright (C) 2020-2021 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ package PublicInbox::LeiDedupe; use strict; use v5.10.1; use PublicInbox::ContentHash qw(content_hash git_sha); -use Digest::SHA (); +use PublicInbox::SHA (); # n.b. mutt sets most of these headers not sure about Bytes our @OID_IGNORE = qw(Status X-Status Content-Length Lines Bytes); @@ -30,7 +30,7 @@ sub _oidbin ($) { defined($_[0]) ? pack('H*', $_[0]) : undef } sub smsg_hash ($) { my ($smsg) = @_; - my $dig = Digest::SHA->new(256); + my $dig = PublicInbox::SHA->new(256); my $x = join("\0", @$smsg{qw(from to cc ds subject references mid)}); utf8::encode($x); $dig->add($x); diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm index abf66315..31013360 100644 --- a/lib/PublicInbox/LeiMirror.pm +++ b/lib/PublicInbox/LeiMirror.pm @@ -18,7 +18,7 @@ use PublicInbox::Config; use PublicInbox::Inbox; use PublicInbox::LeiCurl; use PublicInbox::OnDestroy; -use Digest::SHA qw(sha256_hex sha1_hex); +use PublicInbox::SHA qw(sha256_hex sha1_hex); use POSIX qw(strftime); our $LIVE; # pid => callback diff --git a/lib/PublicInbox/LeiSavedSearch.pm b/lib/PublicInbox/LeiSavedSearch.pm index ed92bfd1..e5396342 100644 --- a/lib/PublicInbox/LeiSavedSearch.pm +++ b/lib/PublicInbox/LeiSavedSearch.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2021 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ # pretends to be like LeiDedupe and also PublicInbox::Inbox @@ -13,7 +13,7 @@ use PublicInbox::Config; use PublicInbox::Spawn qw(run_die); use PublicInbox::ContentHash qw(git_sha); use PublicInbox::MID qw(mids_for_index); -use Digest::SHA qw(sha256_hex); +use PublicInbox::SHA qw(sha256_hex); our $LOCAL_PFX = qr!\A(?:maildir|mh|mbox.+|mmdf|v2):!i; # TODO: put in LeiToMail? # move this to PublicInbox::Config if other things use it: diff --git a/lib/PublicInbox/LeiSucks.pm b/lib/PublicInbox/LeiSucks.pm index 8e866fc9..35d0a8de 100644 --- a/lib/PublicInbox/LeiSucks.pm +++ b/lib/PublicInbox/LeiSucks.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2021 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ # Undocumented hidden command somebody might discover if they're @@ -7,7 +7,7 @@ package PublicInbox::LeiSucks; use strict; use v5.10.1; -use Digest::SHA (); +use PublicInbox::SHA qw(sha1_hex); use Config; use POSIX (); use PublicInbox::Config; @@ -54,13 +54,13 @@ sub lei_sucks { } else { push @out, "Xapian not available: $@\n"; } - my $dig = Digest::SHA->new(1); push @out, "public-inbox blob OIDs of loaded features:\n"; for my $m (grep(m{^PublicInbox/}, sort keys %INC)) { my $f = $INC{$m} // next; # lazy require failed (missing dep) - $dig->add('blob '.(-s $f)."\0"); - $dig->addfile($f); - push @out, ' '.$dig->hexdigest.' '.$m."\n"; + open my $fh, '<', $f or do { warn "open($f): $!"; next }; + my $hex = sha1_hex('blob '.(-s $fh)."\0". + (do { local $/; <$fh> } // die("read: $!"))); + push @out, ' '.$hex.' '.$m."\n"; } push @out, <<'EOM'; Let us know how it sucks! Please include the above and any other diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm index 9fc3128f..306a57e7 100644 --- a/lib/PublicInbox/Linkify.pm +++ b/lib/PublicInbox/Linkify.pm @@ -12,7 +12,7 @@ package PublicInbox::Linkify; use strict; use v5.10.1; -use Digest::SHA qw/sha1_hex/; +use PublicInbox::SHA qw(sha1_hex); use PublicInbox::Hval qw(ascii_html mid_href); use PublicInbox::MID qw($MID_EXTRACT); diff --git a/lib/PublicInbox/MID.pm b/lib/PublicInbox/MID.pm index 35b517e0..4819cc25 100644 --- a/lib/PublicInbox/MID.pm +++ b/lib/PublicInbox/MID.pm @@ -1,15 +1,15 @@ -# Copyright (C) 2015-2021 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ # # Various Message-ID-related functions. package PublicInbox::MID; use strict; -use warnings; -use base qw/Exporter/; +use v5.10.1; # TODO: check unicode_strings compat for v5.12 +use parent qw(Exporter); our @EXPORT_OK = qw(mid_clean id_compress mid2path mid_escape MID_ESC mids references mids_for_index mids_in $MID_EXTRACT); use URI::Escape qw(uri_escape_utf8); -use Digest::SHA qw/sha1_hex/; +use PublicInbox::SHA qw(sha1_hex); require PublicInbox::Address; use constant { ID_MAX => 40, # SHA-1 hex length for HTML id anchors diff --git a/lib/PublicInbox/MdirReader.pm b/lib/PublicInbox/MdirReader.pm index dbb74d6d..db5f4545 100644 --- a/lib/PublicInbox/MdirReader.pm +++ b/lib/PublicInbox/MdirReader.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2020-2021 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ # Maildirs for now, MH eventually @@ -8,7 +8,7 @@ package PublicInbox::MdirReader; use strict; use v5.10.1; use PublicInbox::InboxWritable qw(eml_from_path); -use Digest::SHA qw(sha256_hex); +use PublicInbox::SHA qw(sha256_hex); # returns Maildir flags from a basename ('' for no flags, undef for invalid) sub maildir_basename_flags { diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm index dd33a232..7a91e7eb 100644 --- a/lib/PublicInbox/NNTP.pm +++ b/lib/PublicInbox/NNTP.pm @@ -15,7 +15,7 @@ use PublicInbox::MID qw(mid_escape $MID_EXTRACT); use PublicInbox::Eml; use POSIX qw(strftime); use PublicInbox::DS qw(now); -use Digest::SHA qw(sha1_hex); +use PublicInbox::SHA qw(sha1_hex); use Time::Local qw(timegm timelocal); use PublicInbox::GitAsyncCat; use PublicInbox::Address; diff --git a/lib/PublicInbox/SHA.pm b/lib/PublicInbox/SHA.pm new file mode 100644 index 00000000..da70beef --- /dev/null +++ b/lib/PublicInbox/SHA.pm @@ -0,0 +1,58 @@ +# Copyright (C) all contributors +# License: AGPL-3.0+ +# OpenSSL exception added in commit 22711f81f4e79da6b796820e37803a05cae14645 +# (README: add OpenSSL exception, 2015-10-05) + +# Replaces most uses of Digest::SHA with OpenSSL via Net::SSLeay if +# possible. OpenSSL SHA-256 is nearly twice as fast as Digest::SHA on +# x86-64, and SHA-1 is a bit faster as well. +# I don't think we can implement Digest::SHA->clone with what Net::SSLeay +# gives us... (maybe EVP_MD_CTX_copy+EVP_MD_CTX_copy_ex need to be added +# to Net::SSLeay?) +package PublicInbox::SHA; +use v5.12; +require Exporter; +our @EXPORT_OK = qw(sha1_hex sha256_hex sha256); +our @ISA; + +BEGIN { + push @ISA, 'Exporter'; + unless (eval(<<'EOM')) { +use Net::SSLeay 1.43; +my %SHA = ( + 1 => Net::SSLeay::EVP_get_digestbyname('sha1'), + 256 => Net::SSLeay::EVP_get_digestbyname('sha256'), +); + +sub new { + my ($cls, $n) = @_; + my $mdctx = Net::SSLeay::EVP_MD_CTX_create(); + Net::SSLeay::EVP_DigestInit($mdctx, $SHA{$n}) or + die "EVP_DigestInit $n: $!"; + bless \$mdctx, $cls; +} + +sub add { + my $self = shift; + Net::SSLeay::EVP_DigestUpdate($$self, $_) for @_; + $self; +} + +sub digest { Net::SSLeay::EVP_DigestFinal(${$_[0]}) }; +sub hexdigest { unpack('H*', Net::SSLeay::EVP_DigestFinal(${$_[0]})) } +sub DESTROY { Net::SSLeay::EVP_MD_CTX_destroy(${$_[0]}) }; + +sub sha1_hex { unpack('H*', Net::SSLeay::SHA1($_[0])) }; +sub sha256_hex { unpack('H*', Net::SSLeay::SHA256($_[0])) }; +*sha256 = \&Net::SSLeay::SHA256; +# end of eval +EOM + require Digest::SHA; # stdlib fallback + push @ISA, 'Digest::SHA'; + *sha1_hex = \&Digest::SHA::sha1_hex; + *sha256_hex = \&Digest::SHA::sha256_hex; + *sha256 = \&Digest::SHA::sha256; +} + +} # /BEGIN +1; diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm index 83a8818e..737cc6cb 100644 --- a/lib/PublicInbox/WwwAtomStream.pm +++ b/lib/PublicInbox/WwwAtomStream.pm @@ -8,7 +8,7 @@ use strict; use parent 'PublicInbox::GzipFilter'; use POSIX qw(strftime); -use Digest::SHA qw(sha1_hex); +use PublicInbox::SHA qw(sha1_hex); use PublicInbox::Address; use PublicInbox::Hval qw(ascii_html mid_href); use PublicInbox::MsgTime qw(msg_timestamp); -- cgit v1.2.3-24-ge0c7