about summary refs log tree commit homepage
path: root/lib/PublicInbox
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2023-01-29 10:30:41 +0000
committerEric Wong <e@80x24.org>2023-01-30 06:42:31 +0000
commite6aa13bccb7ea5d5b3246b3a944621515905e360 (patch)
tree822309b8cc2ac85ba50acd33deebb03891b0844b /lib/PublicInbox
parent9eb8baf199cd148b7ebf8e6e130fb832f4e1ef00 (diff)
downloadpublic-inbox-e6aa13bccb7ea5d5b3246b3a944621515905e360.tar.gz
On my x86-64 machine, OpenSSL SHA-256 is nearly twice as fast as
the Digest::SHA implementation from Perl, most likely due to an
optimized assembly implementation.  SHA-1 is a few percent
faster, too.
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r--lib/PublicInbox/ContentDigestDbg.pm4
-rw-r--r--lib/PublicInbox/ContentHash.pm11
-rw-r--r--lib/PublicInbox/Fetch.pm4
-rw-r--r--lib/PublicInbox/Git.pm4
-rw-r--r--lib/PublicInbox/LeiDedupe.pm6
-rw-r--r--lib/PublicInbox/LeiMirror.pm2
-rw-r--r--lib/PublicInbox/LeiSavedSearch.pm4
-rw-r--r--lib/PublicInbox/LeiSucks.pm12
-rw-r--r--lib/PublicInbox/Linkify.pm2
-rw-r--r--lib/PublicInbox/MID.pm8
-rw-r--r--lib/PublicInbox/MdirReader.pm4
-rw-r--r--lib/PublicInbox/NNTP.pm2
-rw-r--r--lib/PublicInbox/SHA.pm58
-rw-r--r--lib/PublicInbox/WwwAtomStream.pm2
14 files changed, 91 insertions, 32 deletions
diff --git a/lib/PublicInbox/ContentDigestDbg.pm b/lib/PublicInbox/ContentDigestDbg.pm
index 425e8589..899afbbe 100644
--- a/lib/PublicInbox/ContentDigestDbg.pm
+++ b/lib/PublicInbox/ContentDigestDbg.pm
@@ -3,9 +3,9 @@
 package PublicInbox::ContentDigestDbg; # cf. PublicInbox::ContentDigest
 use v5.12;
 use Data::Dumper;
-use Digest::SHA;
+use PublicInbox::SHA;
 
-sub new { bless { dig => Digest::SHA->new(256), fh => $_[1] }, __PACKAGE__ }
+sub new { bless { dig => PublicInbox::SHA->new(256), fh => $_[1] }, __PACKAGE__ }
 
 sub add {
         $_[0]->{dig}->add($_[1]);
diff --git a/lib/PublicInbox/ContentHash.pm b/lib/PublicInbox/ContentHash.pm
index 1afbb413..d3ff146a 100644
--- a/lib/PublicInbox/ContentHash.pm
+++ b/lib/PublicInbox/ContentHash.pm
@@ -15,7 +15,8 @@ use PublicInbox::MID qw(mids references);
 use PublicInbox::MsgIter;
 
 # not sure if less-widely supported hash families are worth bothering with
-use Digest::SHA;
+use PublicInbox::SHA; # faster, but no ->clone
+use Digest::SHA; # we still need this for ->clone
 
 sub digest_addr ($$$) {
         my ($dig, $h, $v) = @_;
@@ -93,15 +94,15 @@ sub content_digest ($;$) {
 }
 
 sub content_hash ($) {
-        content_digest($_[0])->digest;
+        content_digest($_[0], PublicInbox::SHA->new(256))->digest;
 }
 
+# don't clone the result of this
 sub git_sha ($$) {
         my ($n, $eml) = @_;
-        my $dig = Digest::SHA->new($n);
+        my $dig = PublicInbox::SHA->new($n);
         my $bref = ref($eml) eq 'SCALAR' ? $eml : \($eml->as_string);
-        $dig->add('blob '.length($$bref)."\0");
-        $dig->add($$bref);
+        $dig->add('blob '.length($$bref)."\0", $$bref);
         $dig;
 }
 
diff --git a/lib/PublicInbox/Fetch.pm b/lib/PublicInbox/Fetch.pm
index 198e2a60..f93eeebe 100644
--- a/lib/PublicInbox/Fetch.pm
+++ b/lib/PublicInbox/Fetch.pm
@@ -92,9 +92,9 @@ sub do_manifest ($$$) {
 
 sub get_fingerprint2 {
         my ($git_dir) = @_;
-        require Digest::SHA;
+        require PublicInbox::SHA;
         my $rd = popen_rd([qw(git show-ref)], undef, { -C => $git_dir });
-        Digest::SHA::sha256(do { local $/; <$rd> });
+        PublicInbox::SHA::sha256(do { local $/; <$rd> });
 }
 
 sub writable_dir ($) {
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index 3e2b435c..fd7a0382 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -20,7 +20,7 @@ use PublicInbox::Spawn qw(popen_rd which);
 use PublicInbox::Tmpfile;
 use IO::Poll qw(POLLIN);
 use Carp qw(croak carp);
-use Digest::SHA ();
+use PublicInbox::SHA ();
 use PublicInbox::DS qw(awaitpid);
 our @EXPORT_OK = qw(git_unquote git_quote);
 our $PIPE_BUFSIZ = 65536; # Linux default
@@ -630,7 +630,7 @@ sub cloneurl {
 sub manifest_entry {
         my ($self, $epoch, $default_desc) = @_;
         my $fh = $self->popen('show-ref');
-        my $dig = Digest::SHA->new(1);
+        my $dig = PublicInbox::SHA->new(1);
         while (read($fh, my $buf, 65536)) {
                 $dig->add($buf);
         }
diff --git a/lib/PublicInbox/LeiDedupe.pm b/lib/PublicInbox/LeiDedupe.pm
index 32f99cd0..22864508 100644
--- a/lib/PublicInbox/LeiDedupe.pm
+++ b/lib/PublicInbox/LeiDedupe.pm
@@ -1,10 +1,10 @@
-# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 package PublicInbox::LeiDedupe;
 use strict;
 use v5.10.1;
 use PublicInbox::ContentHash qw(content_hash git_sha);
-use Digest::SHA ();
+use PublicInbox::SHA ();
 
 # n.b. mutt sets most of these headers not sure about Bytes
 our @OID_IGNORE = qw(Status X-Status Content-Length Lines Bytes);
@@ -30,7 +30,7 @@ sub _oidbin ($) { defined($_[0]) ? pack('H*', $_[0]) : undef }
 
 sub smsg_hash ($) {
         my ($smsg) = @_;
-        my $dig = Digest::SHA->new(256);
+        my $dig = PublicInbox::SHA->new(256);
         my $x = join("\0", @$smsg{qw(from to cc ds subject references mid)});
         utf8::encode($x);
         $dig->add($x);
diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm
index abf66315..31013360 100644
--- a/lib/PublicInbox/LeiMirror.pm
+++ b/lib/PublicInbox/LeiMirror.pm
@@ -18,7 +18,7 @@ use PublicInbox::Config;
 use PublicInbox::Inbox;
 use PublicInbox::LeiCurl;
 use PublicInbox::OnDestroy;
-use Digest::SHA qw(sha256_hex sha1_hex);
+use PublicInbox::SHA qw(sha256_hex sha1_hex);
 use POSIX qw(strftime);
 
 our $LIVE; # pid => callback
diff --git a/lib/PublicInbox/LeiSavedSearch.pm b/lib/PublicInbox/LeiSavedSearch.pm
index ed92bfd1..e5396342 100644
--- a/lib/PublicInbox/LeiSavedSearch.pm
+++ b/lib/PublicInbox/LeiSavedSearch.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # pretends to be like LeiDedupe and also PublicInbox::Inbox
@@ -13,7 +13,7 @@ use PublicInbox::Config;
 use PublicInbox::Spawn qw(run_die);
 use PublicInbox::ContentHash qw(git_sha);
 use PublicInbox::MID qw(mids_for_index);
-use Digest::SHA qw(sha256_hex);
+use PublicInbox::SHA qw(sha256_hex);
 our $LOCAL_PFX = qr!\A(?:maildir|mh|mbox.+|mmdf|v2):!i; # TODO: put in LeiToMail?
 
 # move this to PublicInbox::Config if other things use it:
diff --git a/lib/PublicInbox/LeiSucks.pm b/lib/PublicInbox/LeiSucks.pm
index 8e866fc9..35d0a8de 100644
--- a/lib/PublicInbox/LeiSucks.pm
+++ b/lib/PublicInbox/LeiSucks.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # Undocumented hidden command somebody might discover if they're
@@ -7,7 +7,7 @@
 package PublicInbox::LeiSucks;
 use strict;
 use v5.10.1;
-use Digest::SHA ();
+use PublicInbox::SHA qw(sha1_hex);
 use Config;
 use POSIX ();
 use PublicInbox::Config;
@@ -54,13 +54,13 @@ sub lei_sucks {
         } else {
                 push @out, "Xapian not available: $@\n";
         }
-        my $dig = Digest::SHA->new(1);
         push @out, "public-inbox blob OIDs of loaded features:\n";
         for my $m (grep(m{^PublicInbox/}, sort keys %INC)) {
                 my $f = $INC{$m} // next; # lazy require failed (missing dep)
-                $dig->add('blob '.(-s $f)."\0");
-                $dig->addfile($f);
-                push @out, '  '.$dig->hexdigest.' '.$m."\n";
+                open my $fh, '<', $f or do { warn "open($f): $!"; next };
+                my $hex = sha1_hex('blob '.(-s $fh)."\0".
+                                (do { local $/; <$fh> } // die("read: $!")));
+                push @out, '  '.$hex.' '.$m."\n";
         }
         push @out, <<'EOM';
 Let us know how it sucks!  Please include the above and any other
diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm
index 9fc3128f..306a57e7 100644
--- a/lib/PublicInbox/Linkify.pm
+++ b/lib/PublicInbox/Linkify.pm
@@ -12,7 +12,7 @@
 package PublicInbox::Linkify;
 use strict;
 use v5.10.1;
-use Digest::SHA qw/sha1_hex/;
+use PublicInbox::SHA qw(sha1_hex);
 use PublicInbox::Hval qw(ascii_html mid_href);
 use PublicInbox::MID qw($MID_EXTRACT);
 
diff --git a/lib/PublicInbox/MID.pm b/lib/PublicInbox/MID.pm
index 35b517e0..4819cc25 100644
--- a/lib/PublicInbox/MID.pm
+++ b/lib/PublicInbox/MID.pm
@@ -1,15 +1,15 @@
-# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # Various Message-ID-related functions.
 package PublicInbox::MID;
 use strict;
-use warnings;
-use base qw/Exporter/;
+use v5.10.1; # TODO: check unicode_strings compat for v5.12
+use parent qw(Exporter);
 our @EXPORT_OK = qw(mid_clean id_compress mid2path mid_escape MID_ESC
         mids references mids_for_index mids_in $MID_EXTRACT);
 use URI::Escape qw(uri_escape_utf8);
-use Digest::SHA qw/sha1_hex/;
+use PublicInbox::SHA qw(sha1_hex);
 require PublicInbox::Address;
 use constant {
         ID_MAX => 40, # SHA-1 hex length for HTML id anchors
diff --git a/lib/PublicInbox/MdirReader.pm b/lib/PublicInbox/MdirReader.pm
index dbb74d6d..db5f4545 100644
--- a/lib/PublicInbox/MdirReader.pm
+++ b/lib/PublicInbox/MdirReader.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # Maildirs for now, MH eventually
@@ -8,7 +8,7 @@ package PublicInbox::MdirReader;
 use strict;
 use v5.10.1;
 use PublicInbox::InboxWritable qw(eml_from_path);
-use Digest::SHA qw(sha256_hex);
+use PublicInbox::SHA qw(sha256_hex);
 
 # returns Maildir flags from a basename ('' for no flags, undef for invalid)
 sub maildir_basename_flags {
diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index dd33a232..7a91e7eb 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -15,7 +15,7 @@ use PublicInbox::MID qw(mid_escape $MID_EXTRACT);
 use PublicInbox::Eml;
 use POSIX qw(strftime);
 use PublicInbox::DS qw(now);
-use Digest::SHA qw(sha1_hex);
+use PublicInbox::SHA qw(sha1_hex);
 use Time::Local qw(timegm timelocal);
 use PublicInbox::GitAsyncCat;
 use PublicInbox::Address;
diff --git a/lib/PublicInbox/SHA.pm b/lib/PublicInbox/SHA.pm
new file mode 100644
index 00000000..da70beef
--- /dev/null
+++ b/lib/PublicInbox/SHA.pm
@@ -0,0 +1,58 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+# OpenSSL exception added in commit 22711f81f4e79da6b796820e37803a05cae14645
+# (README: add OpenSSL exception, 2015-10-05)
+
+# Replaces most uses of Digest::SHA with OpenSSL via Net::SSLeay if
+# possible.  OpenSSL SHA-256 is nearly twice as fast as Digest::SHA on
+# x86-64, and SHA-1 is a bit faster as well.
+# I don't think we can implement Digest::SHA->clone with what Net::SSLeay
+# gives us...  (maybe EVP_MD_CTX_copy+EVP_MD_CTX_copy_ex need to be added
+# to Net::SSLeay?)
+package PublicInbox::SHA;
+use v5.12;
+require Exporter;
+our @EXPORT_OK = qw(sha1_hex sha256_hex sha256);
+our @ISA;
+
+BEGIN {
+        push @ISA, 'Exporter';
+        unless (eval(<<'EOM')) {
+use Net::SSLeay 1.43;
+my %SHA = (
+        1 => Net::SSLeay::EVP_get_digestbyname('sha1'),
+        256 => Net::SSLeay::EVP_get_digestbyname('sha256'),
+);
+
+sub new {
+        my ($cls, $n) = @_;
+        my $mdctx = Net::SSLeay::EVP_MD_CTX_create();
+        Net::SSLeay::EVP_DigestInit($mdctx, $SHA{$n}) or
+                        die "EVP_DigestInit $n: $!";
+        bless \$mdctx, $cls;
+}
+
+sub add {
+        my $self = shift;
+        Net::SSLeay::EVP_DigestUpdate($$self, $_) for @_;
+        $self;
+}
+
+sub digest { Net::SSLeay::EVP_DigestFinal(${$_[0]}) };
+sub hexdigest { unpack('H*', Net::SSLeay::EVP_DigestFinal(${$_[0]})) }
+sub DESTROY { Net::SSLeay::EVP_MD_CTX_destroy(${$_[0]}) };
+
+sub sha1_hex { unpack('H*', Net::SSLeay::SHA1($_[0])) };
+sub sha256_hex { unpack('H*', Net::SSLeay::SHA256($_[0])) };
+*sha256 = \&Net::SSLeay::SHA256;
+# end of eval
+EOM
+        require Digest::SHA; # stdlib fallback
+        push @ISA, 'Digest::SHA';
+        *sha1_hex = \&Digest::SHA::sha1_hex;
+        *sha256_hex = \&Digest::SHA::sha256_hex;
+        *sha256 = \&Digest::SHA::sha256;
+}
+
+} # /BEGIN
+1;
diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm
index 83a8818e..737cc6cb 100644
--- a/lib/PublicInbox/WwwAtomStream.pm
+++ b/lib/PublicInbox/WwwAtomStream.pm
@@ -8,7 +8,7 @@ use strict;
 use parent 'PublicInbox::GzipFilter';
 
 use POSIX qw(strftime);
-use Digest::SHA qw(sha1_hex);
+use PublicInbox::SHA qw(sha1_hex);
 use PublicInbox::Address;
 use PublicInbox::Hval qw(ascii_html mid_href);
 use PublicInbox::MsgTime qw(msg_timestamp);