* [PATCH 1/4] viewdiff: save memory by eliminating two captures
2022-09-26 10:17 [PATCH 0/4] some minor performance tweaks Eric Wong
@ 2022-09-26 10:17 ` Eric Wong
2022-09-26 10:17 ` [PATCH 2/4] gcf2: support worktree $GIT_DIR Eric Wong
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Eric Wong @ 2022-09-26 10:17 UTC (permalink / raw)
To: meta
Avoid relying on $DIGIT captures when @- and @+ to access
last match start and end, respectively. The elimination of
the post capture ought to allow the use of sv_chop to advance
the string start pointer without memory copies.
This ought to save 1-2MB of memory on my system since I've
noticed the captures was using a big chunk of scratchpad
space.
---
lib/PublicInbox/ViewDiff.pm | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm
index 9a7adb57..95bbf2d2 100644
--- a/lib/PublicInbox/ViewDiff.pm
+++ b/lib/PublicInbox/ViewDiff.pm
@@ -154,16 +154,16 @@ sub diff_header ($$$) {
sub diff_before_or_after ($$) {
my ($ctx, $x) = @_;
- if (exists $ctx->{-anchors} && $$x =~ /\A(.*?) # likely "---\n" # \$1
- # diffstat lines:
- ((?:^\x20(?:[^\n]+?)(?:\x20+\|\x20[^\n]*\n))+)
- (\x20[0-9]+\x20files?\x20)changed,
- (.*?)\z/msx) { # notes, commit message, etc
- my @x = ($4, $3, $2, $1);
- undef $$x;
+ if (exists $ctx->{-anchors} && $$x =~ # diffstat lines:
+ /((?:^\x20(?:[^\n]+?)(?:\x20+\|\x20[^\n]*\n))+)
+ (\x20[0-9]+\x20files?\x20)changed,/msx) {
+ my $pre = substr($$x, 0, $-[0]); # (likely) short prefix
+ substr($$x, 0, $+[0], ''); # sv_chop on $$x ($$x may be long)
+ my @x = ($2, $1);
my $lnk = $ctx->{-linkify};
my $zfh = $ctx->{zfh};
- print $zfh $lnk->to_html(pop @x); # $1 uninteresting prefix
+ # uninteresting prefix
+ print $zfh $lnk->to_html($pre);
for my $l (split(/^/m, pop(@x))) { # $2 per-file stat lines
$l =~ /^ (.+)( +\| .*\z)/s and
anchor0($ctx, $1, $2) and next;
@@ -173,7 +173,7 @@ sub diff_before_or_after ($$) {
print $zfh pop(@x), # $3 /^ \d+ files? /
qq(<a href="$ch">changed</a>,),
# insertions/deletions, notes, commit message, etc:
- $lnk->to_html(@x);
+ $lnk->to_html($$x);
} else {
print { $ctx->{zfh} } $ctx->{-linkify}->to_html($$x);
}
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 2/4] gcf2: support worktree $GIT_DIR
2022-09-26 10:17 [PATCH 0/4] some minor performance tweaks Eric Wong
2022-09-26 10:17 ` [PATCH 1/4] viewdiff: save memory by eliminating two captures Eric Wong
@ 2022-09-26 10:17 ` Eric Wong
2022-09-26 10:17 ` [PATCH 3/4] viewvcs: load blobs asynchronously Eric Wong
2022-09-26 10:17 ` [PATCH 4/4] git: reduce early bare-bones memory use Eric Wong
3 siblings, 0 replies; 5+ messages in thread
From: Eric Wong @ 2022-09-26 10:17 UTC (permalink / raw)
To: meta
We must use `git rev-parse --git-path objects' instead of
blindly appending '/objects' to $GIT_DIR, since appending
doesn't work when $GIT_DIR is a worktree.
---
lib/PublicInbox/Gcf2.pm | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/lib/PublicInbox/Gcf2.pm b/lib/PublicInbox/Gcf2.pm
index 02cd0de2..2ba2efff 100644
--- a/lib/PublicInbox/Gcf2.pm
+++ b/lib/PublicInbox/Gcf2.pm
@@ -80,7 +80,8 @@ EOM
}
sub add_alt ($$) {
- my ($gcf2, $objdir) = @_;
+ my ($gcf2, $git_dir) = @_;
+ my $objdir = PublicInbox::Git->new($git_dir)->git_path('objects');
# libgit2 (tested 0.27.7+dfsg.1-0.2 and 0.28.3+dfsg.1-1~bpo10+1
# in Debian) doesn't handle relative epochs properly when nested
@@ -120,7 +121,7 @@ sub loop (;$) {
while (<STDIN>) {
chomp;
my ($oid, $git_dir) = split(/ /, $_, 2);
- $seen{$git_dir} //= add_alt($gcf2, "$git_dir/objects");
+ $seen{$git_dir} //= add_alt($gcf2, $git_dir);
if (!$gcf2->cat_oid(1, $oid)) {
# retry once if missing. We only get unabbreviated OIDs
# from SQLite or Xapian DBs, here, so malicious clients
@@ -128,7 +129,7 @@ sub loop (;$) {
warn "I: $$ $oid missing, retrying in $git_dir\n";
$gcf2 = new();
- %seen = ($git_dir => add_alt($gcf2,"$git_dir/objects"));
+ %seen = ($git_dir => add_alt($gcf2, $git_dir);
$check_at = clock_gettime(CLOCK_MONOTONIC) + $exp;
if ($gcf2->cat_oid(1, $oid)) {
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 3/4] viewvcs: load blobs asynchronously
2022-09-26 10:17 [PATCH 0/4] some minor performance tweaks Eric Wong
2022-09-26 10:17 ` [PATCH 1/4] viewdiff: save memory by eliminating two captures Eric Wong
2022-09-26 10:17 ` [PATCH 2/4] gcf2: support worktree $GIT_DIR Eric Wong
@ 2022-09-26 10:17 ` Eric Wong
2022-09-26 10:17 ` [PATCH 4/4] git: reduce early bare-bones memory use Eric Wong
3 siblings, 0 replies; 5+ messages in thread
From: Eric Wong @ 2022-09-26 10:17 UTC (permalink / raw)
To: meta
This actually leads to a nice 3-5% speedup under parallel loads
when using git(1) w/o SHA-1 collision detection enabled. Gcf2
is slower since libgit2 has SHA-1 collision detection enabled
on my system.
Since we're in the area, improve location of comments w.r.t.
cgit CSS class names and note the reliance on scratchpad for
performance in a tight loop.
---
lib/PublicInbox/ViewVCS.pm | 21 ++++++++++++++++-----
1 file changed, 16 insertions(+), 5 deletions(-)
diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index f740591d..915cf2c5 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -413,11 +413,21 @@ sub solve_result {
blob $oid $size bytes $raw_link</pre>
EOM
}
+ @{$ctx->{-paths}} = ($path, $raw_link);
+ $ctx->{git} = $git;
+ if ($ctx->{env}->{'pi-httpd.async'}) {
+ ibx_async_cat($ctx, $oid, \&show_blob, $ctx);
+ } else { # synchronous
+ $git->cat_async($oid, \&show_blob, $ctx);
+ $git->cat_async_wait;
+ }
+}
- my $blob = $git->cat_file($oid);
- if (!$blob) { # WTF?
+sub show_blob { # git->cat_async callback
+ my ($blob, $oid, $type, $size, $ctx) = @_;
+ if (!$blob) {
my $e = "Failed to retrieve generated blob ($oid)";
- warn "$e ($git->{git_dir})";
+ warn "$e ($ctx->{git}->{git_dir}) type=$type";
return html_page($ctx, 500, "<pre><b>$e</b></pre>".dbg_log($ctx))
}
@@ -428,6 +438,7 @@ EOM
return delete($ctx->{-wcb})->([200, $h, [ $$blob ]]);
}
+ my ($path, $raw_link) = @{delete $ctx->{-paths}};
$bin and return html_page($ctx, 200,
"<pre>blob $oid $size bytes (binary)" .
" $raw_link</pre>".dbg_log($ctx));
@@ -445,14 +456,14 @@ EOM
$$blob = ascii_html($$blob);
}
+ # using some of the same CSS class names and ids as cgit
my $x = "<pre>blob $oid $size bytes $raw_link</pre>" .
"<hr /><table\nclass=blob>".
"<tr><td\nclass=linenumbers><pre>";
+ # scratchpad in this loop is faster here than `printf $zfh':
$x .= sprintf("<a id=n$_ href=#n$_>% ${pad}u</a>\n", $_) for (1..$nl);
$x .= '</pre></td><td><pre> </pre></td>'. # pad for non-CSS users
"<td\nclass=lines><pre\nstyle='white-space:pre'><code>";
-
- # using some of the same CSS class names and ids as cgit
html_page($ctx, 200, $x, $ctx->{-linkify}->linkify_2($$blob),
'</code></pre></td></tr></table>'.dbg_log($ctx));
}
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 4/4] git: reduce early bare-bones memory use
2022-09-26 10:17 [PATCH 0/4] some minor performance tweaks Eric Wong
` (2 preceding siblings ...)
2022-09-26 10:17 ` [PATCH 3/4] viewvcs: load blobs asynchronously Eric Wong
@ 2022-09-26 10:17 ` Eric Wong
3 siblings, 0 replies; 5+ messages in thread
From: Eric Wong @ 2022-09-26 10:17 UTC (permalink / raw)
To: meta
The {-git_path} cache can rely on auto-vivification, and
{alt_st} may not be needed for short-lived repos. So don't
populate those fields until they're needed, since we can
expect to handle thousands of git repos, too.
---
lib/PublicInbox/Git.pm | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index b2ae75c8..9140caea 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -66,7 +66,7 @@ sub new {
$git_dir =~ tr!/!/!s;
$git_dir =~ s!/*\z!!s;
# may contain {-tmp} field for File::Temp::Dir
- bless { git_dir => $git_dir, alt_st => '', -git_path => {} }, $class
+ bless { git_dir => $git_dir }, $class
}
sub git_path ($$) {
@@ -90,7 +90,7 @@ sub alternates_changed {
# can't rely on 'q' on some 32-bit builds, but `d' works
my $st = pack('dd', $st[10], $st[7]); # 10: ctime, 7: size
- return 0 if $self->{alt_st} eq $st;
+ return 0 if ($self->{alt_st} // '') eq $st;
$self->{alt_st} = $st; # always a true value
}
^ permalink raw reply related [flat|nested] 5+ messages in thread