user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
* [PATCH] lg2: disable strict hash verification
@ 2025-03-18  8:30 Eric Wong
  0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2025-03-18  8:30 UTC (permalink / raw)
  To: meta

Unlike git(1), libgit2 verifies the SHA-(1|256) of objects it
reads by default.  This verification results in a large (nearly
100% w/ SHA1DC) performance penalty for us.  Since our libgit2
code only reads (and never writes objects), just follow git(1)
and skip verification for normal reads.

This brings our libgit2-based Gcf2 batch loop performance closer
to that of the `git cat-file --batch-command' as shown in the
new xt/lg2_cmp.t developer test.  However, Gcf2Client still uses
a more verbose (but more flexible) input format and the Perl
gcf2_loop still incurs normal Perl method dispatch overheads.
---
 MANIFEST               |  1 +
 lib/PublicInbox/Lg2.pm | 13 ++++++-----
 xt/lg2_cmp.t           | 51 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 60 insertions(+), 5 deletions(-)
 create mode 100644 xt/lg2_cmp.t

diff --git a/MANIFEST b/MANIFEST
index 321c652d..93407a46 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -670,6 +670,7 @@ xt/imapd-mbsync-oimap.t
 xt/imapd-validate.t
 xt/lei-auth-fail.t
 xt/lei-onion-convert.t
+xt/lg2_cmp.t
 xt/mem-imapd-tls.t
 xt/mem-msgview.t
 xt/mem-nntpd-tls.t
diff --git a/lib/PublicInbox/Lg2.pm b/lib/PublicInbox/Lg2.pm
index 0ee9b354..a4ea4b76 100644
--- a/lib/PublicInbox/Lg2.pm
+++ b/lib/PublicInbox/Lg2.pm
@@ -48,11 +48,14 @@ BEGIN {
 	STDOUT->autoflush(1);
 	$CFG{CCFLAGSEX} = $vals->{cflags};
 	$CFG{LIBS} = $vals->{libs};
-
-	# we use Capitalized and ALLCAPS for compatibility with old Inline::C
-	CORE::eval <<'EOM';
-use Inline C => Config => %CFG, BOOT => q[git_libgit2_init();];
-use Inline C => $c_src, BUILD_NOISY => 1;
+	my $boot = 'git_libgit2_init();';
+	eval("v$vals->{modversion}") ge v0.26 and
+		$boot .= <<EOM;
+git_libgit2_opts(GIT_OPT_ENABLE_STRICT_HASH_VERIFICATION, 0);
+EOM
+	eval <<EOM;
+use Inline C => Config => \%CFG, BOOT => \$boot;
+use Inline C => \$c_src . "/* boot: $boot */\n", BUILD_NOISY => 1;
 EOM
 	$err = $@;
 	open(STDERR, '>&', $olderr);
diff --git a/xt/lg2_cmp.t b/xt/lg2_cmp.t
new file mode 100644
index 00000000..968e390d
--- /dev/null
+++ b/xt/lg2_cmp.t
@@ -0,0 +1,51 @@
+#!perl -w
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use v5.12;
+use Benchmark qw(:all);
+use PublicInbox::TestCommon;
+require_mods 'PublicInbox::Lg2';
+require_git v2.19;
+require PublicInbox::Gcf2Client;
+my $git_dir = $ENV{GIANT_GIT_DIR} //
+	plan skip_all => "GIANT_GIT_DIR not defined for $0";
+my $git = PublicInbox::Git->new($git_dir);
+my @cat = qw[cat-file --buffer --batch-check=%(objectname)
+	--batch-all-objects --unordered];
+my $nr = $ENV{NR} || 100;
+diag "NR=$nr";
+my $n = 0;
+my $count = sub { ++$n };
+
+my $gcf2c = PublicInbox::Gcf2Client::new();
+my $repo = " $git_dir\n";
+my ($lg2_total, $git_total);
+my $lg2_async = timeit($nr, sub {
+	my $cat = $git->popen(@cat);
+	while (<$cat>) {
+		chomp;
+		$gcf2c->gcf2_async($_.$repo, $count);
+	}
+	$cat->close or xbail "cat: $?";
+	$gcf2c->event_step while PublicInbox::Git::cat_active($gcf2c);
+	$lg2_total += $n;
+	$n = 0;
+});
+
+my $git_async = timeit($nr, sub {
+	my $cat = $git->popen(@cat);
+	while (<$cat>) {
+		chomp;
+		$git->cat_async($_, $count);
+	}
+	$cat->close or xbail "cat: $?";
+	$git->async_wait_all;
+	$git_total += $n;
+	$n = 0;
+});
+
+diag 'git '.timestr($git_async);
+diag 'lg2 '.timestr($lg2_async);
+is $lg2_total, $git_total, 'libgit2 and git saw same number of requests';
+
+done_testing;

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2025-03-18  8:30 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-03-18  8:30 [PATCH] lg2: disable strict hash verification Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).