diff options
author | Eric Wong <e@80x24.org> | 2023-10-25 06:33:55 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2023-10-25 07:28:51 +0000 |
commit | 9d8a15fdb93e3a32f73169f827953c69c5ff251e (patch) | |
tree | 8f9374fe75f1cfee78a01670c6d3bd70010fc44f | |
parent | a309ca6ba5f3c0a541bb15414a4c3357a86dfbda (diff) | |
download | public-inbox-9d8a15fdb93e3a32f73169f827953c69c5ff251e.tar.gz |
While uncommon, some git repos have hundreds of thousands of refs and slurping that output into memory can bloat the heap. Introduce a sha_all sub in PublicInbox::SHA to loop until EOF and rely on autodie for checking sysread errors.
-rw-r--r-- | lib/PublicInbox/CodeSearchIdx.pm | 7 | ||||
-rw-r--r-- | lib/PublicInbox/Fetch.pm | 4 | ||||
-rw-r--r-- | lib/PublicInbox/Git.pm | 6 | ||||
-rw-r--r-- | lib/PublicInbox/LeiMirror.pm | 14 | ||||
-rw-r--r-- | lib/PublicInbox/SHA.pm | 11 |
5 files changed, 23 insertions, 19 deletions
diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm index e31432b9..aeee37c0 100644 --- a/lib/PublicInbox/CodeSearchIdx.pm +++ b/lib/PublicInbox/CodeSearchIdx.pm @@ -45,7 +45,7 @@ use POSIX qw(WNOHANG SEEK_SET); use File::Path (); use File::Spec (); use List::Util qw(max); -use PublicInbox::SHA qw(sha256_hex); +use PublicInbox::SHA qw(sha256_hex sha_all); use PublicInbox::Search qw(xap_terms); use PublicInbox::SearchIdx qw(add_val); use PublicInbox::Config qw(glob2re rel2abs_collapsed); @@ -386,10 +386,7 @@ sub fp_fini { # run_git cb my (undef, $self, $git, $prep_repo) = @_; my $refs = $git->{-repo}->{refs} // die 'BUG: no {-repo}->{refs}'; sysseek($refs, 0, SEEK_SET); - my $buf; - my $dig = PublicInbox::SHA->new(256); - while (sysread($refs, $buf, 65536)) { $dig->add($buf) } - $git->{-repo}->{fp} = $dig->hexdigest; + $git->{-repo}->{fp} = sha_all(256, $refs)->hexdigest; } sub ct_start ($$$) { diff --git a/lib/PublicInbox/Fetch.pm b/lib/PublicInbox/Fetch.pm index 6e9b1e94..e41dd448 100644 --- a/lib/PublicInbox/Fetch.pm +++ b/lib/PublicInbox/Fetch.pm @@ -10,6 +10,7 @@ use PublicInbox::Admin; use PublicInbox::LEI; use PublicInbox::LeiCurl; use PublicInbox::LeiMirror; +use PublicInbox::SHA qw(sha_all); use File::Temp (); sub new { bless {}, __PACKAGE__ } @@ -92,9 +93,8 @@ sub do_manifest ($$$) { sub get_fingerprint2 { my ($git_dir) = @_; - require PublicInbox::SHA; my $rd = popen_rd([qw(git show-ref)], undef, { -C => $git_dir }); - PublicInbox::SHA::sha256(do { local $/; <$rd> }); + sha_all(256, $rd)->digest; # ignore show-ref errors } sub writable_dir ($) { diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index 476dcf30..9c26d8bf 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -23,7 +23,7 @@ use PublicInbox::ProcessIONBF; use PublicInbox::Tmpfile; use IO::Poll qw(POLLIN); use Carp qw(croak carp); -use PublicInbox::SHA (); +use PublicInbox::SHA qw(sha_all); our %HEXLEN2SHA = (40 => 1, 64 => 256); our %OFMT2HEXLEN = (sha1 => 40, sha256 => 64); our @EXPORT_OK = qw(git_unquote git_quote %HEXLEN2SHA %OFMT2HEXLEN read_all); @@ -620,10 +620,8 @@ sub manifest_entry { $ent->{reference} = $buf; } } - my $dig = PublicInbox::SHA->new(1); - while (CORE::read($sr, $buf, 65536)) { $dig->add($buf) } + $ent->{fingerprint} = sha_all(1, $sr)->hexdigest; CORE::close $sr or return; # empty, uninitialized git repo - $ent->{fingerprint} = $dig->hexdigest; $ent->{modified} = modified(undef, $mod); chomp($buf = <$own> // ''); utf8::decode($buf); diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm index 47fb767b..43e59e6c 100644 --- a/lib/PublicInbox/LeiMirror.pm +++ b/lib/PublicInbox/LeiMirror.pm @@ -19,10 +19,10 @@ use PublicInbox::Inbox; use PublicInbox::Git qw(read_all); use PublicInbox::LeiCurl; use PublicInbox::OnDestroy; -use PublicInbox::SHA qw(sha256_hex sha1_hex); +use PublicInbox::SHA qw(sha256_hex sha_all); use POSIX qw(strftime); -use autodie qw(chdir chmod close open pipe readlink seek symlink sysopen - truncate unlink); +use autodie qw(chdir chmod close open pipe readlink + seek symlink sysopen sysseek truncate unlink); our $LIVE; # pid => callback our $FGRP_TODO; # objstore -> [[ to resume ], [ to clone ]] @@ -533,10 +533,10 @@ sub fp_done { } return if !keep_going($self); my $fh = delete $self->{-show_ref} // die 'BUG: no show-ref output'; - seek($fh, SEEK_SET, 0); + sysseek($fh, SEEK_SET, 0); $self->{-ent} // die 'BUG: no -ent'; my $A = $self->{-ent}->{fingerprint} // die 'BUG: no fingerprint'; - my $B = sha1_hex(read_all($fh)); + my $B = sha_all(1, $fh)->hexdigest; return $cb->($self, @arg) if $A ne $B; $self->{lei}->qerr("# $self->{-key} up-to-date"); } @@ -730,10 +730,10 @@ sub up_fp_done { my ($self) = @_; return if !keep_going($self); my $fh = delete $self->{-show_ref_up} // die 'BUG: no show-ref output'; - seek($fh, SEEK_SET, 0); + sysseek($fh, SEEK_SET, 0); $self->{-ent} // die 'BUG: no -ent'; my $A = $self->{-ent}->{fingerprint} // die 'BUG: no fingerprint'; - my $B = sha1_hex(read_all($fh)); + my $B = sha_all(1, $fh)->hexdigest; return if $A eq $B; $self->{-ent}->{fingerprint} = $B; push @{$self->{chg}->{fp_mismatch}}, $self->{-key}; diff --git a/lib/PublicInbox/SHA.pm b/lib/PublicInbox/SHA.pm index 81f62618..3fa8530e 100644 --- a/lib/PublicInbox/SHA.pm +++ b/lib/PublicInbox/SHA.pm @@ -12,7 +12,8 @@ package PublicInbox::SHA; use v5.12; require Exporter; -our @EXPORT_OK = qw(sha1_hex sha256_hex sha256); +our @EXPORT_OK = qw(sha1_hex sha256_hex sha256 sha_all); +use autodie qw(sysread); our @ISA; BEGIN { @@ -55,4 +56,12 @@ EOM } } # /BEGIN + +sub sha_all ($$) { + my ($n, $fh) = @_; + my ($dig, $buf) = (PublicInbox::SHA->new($n)); + while (sysread($fh, $buf, 65536)) { $dig->add($buf) } + $dig +} + 1; |