From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF, T_SCC_BODY_TEXT_LINE shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 7A27B1F451 for ; Thu, 11 Jan 2024 22:36:08 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1705012568; bh=SXn+kxSe/fiaWLlcBmlZayVIrpvPNn0tbbOl5Ovs9fE=; h=Date:From:To:Subject:From; b=zNE+F4iixV89xeejxR3U6GmFZOt1bv3oMIDQ9zsU9bgl6OEksQWQuY3oXUS+r8yjK dMI81T6K2uHBezGDeu24sLPwLh524OkZ0RWKRgwLOnkRP8Z4dZhbp7mLNa3H1lLy1k f9v1LRuSPdQgCT2PFH6gGl/U0OVswjtaqmI9EwvE= Date: Thu, 11 Jan 2024 22:36:08 +0000 From: Eric Wong To: meta@public-inbox.org Subject: [abandoned/WIP]: use libgit2 to parse configs Message-ID: <20240111223608.M380280@dcvr> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline List-Id: Publishing for documentation purposes This was intended to avoid vfork+execve overhead, but error handling seems wonky and giving libgit2 a bad config file results in something like: perl: symbol lookup error: */Gcf2_*.so: undefined symbol: kkiterr_last Which I'm very confused by :x And the tests which do pass don't seem much faster (if at all). --- lib/PublicInbox/Config.pm | 35 +++++++++++++++------- lib/PublicInbox/Gcf2.pm | 8 +++++ lib/PublicInbox/gcf2_libgit2.h | 55 ++++++++++++++++++++++++++++++++-- t/gcf2.t | 37 +++++++++++++++++++++-- 4 files changed, 119 insertions(+), 16 deletions(-) diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index 6bebf790..244dcd1d 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -16,6 +16,7 @@ use PublicInbox::Inbox; use PublicInbox::Spawn qw(popen_rd run_qx); our $LD_PRELOAD = $ENV{LD_PRELOAD}; # only valid at startup our $DEDUPE; # set to {} to dedupe or clear cache +use autodie qw(open seek); sub _array ($) { ref($_[0]) eq 'ARRAY' ? $_[0] : [ $_[0] ] } @@ -182,17 +183,29 @@ sub git_config_dump { $file = undef if !-e $file; # XXX should we set {-f} if !-e $file? return bless {}, $class if (!@opt_c && !defined($file)); - my %env; - my $opt = { 2 => $lei->{2} // 2 }; - if (@opt_c) { - unshift(@opt_c, '-c', "include.path=$file") if defined($file); - tmp_cmd_opt(\%env, $opt); - } - my @cmd = ('git', @opt_c, qw(config -z -l --includes)); - push(@cmd, '-f', $file) if !@opt_c && defined($file); - my $fh = popen_rd(\@cmd, \%env, $opt); - my $rv = config_fh_parse($fh, "\0", "\n"); - $fh->close or die "@cmd failed: \$?=$?\n"; + my $rv; + if (eval { require PublicInbox::Gcf2 }) { + open my $fh, '+>', undef; + PublicInbox::Lg2Cfg->new($file)->lz($fh) if defined $file; + for (@{$lei->{opt}->{c} // []}) { + print $fh join("\n", split /=/, $_, 2), "\0"; + } + $fh->flush or die "flush: $!"; + seek($fh, 0, 0); + $rv = config_fh_parse($fh, "\0", "\n"); + } else { + my %env; + my $opt = { 2 => $lei->{2} // 2 }; + if (@opt_c) { + unshift(@opt_c, '-c',"include.path=$file") if defined($file); + tmp_cmd_opt(\%env, $opt); + } + my @cmd = ('git', @opt_c, qw(config -z -l --includes)); + push(@cmd, '-f', $file) if !@opt_c && defined($file); + my $fh = popen_rd(\@cmd, \%env, $opt); + $rv = config_fh_parse($fh, "\0", "\n"); + $fh->close or die "@cmd failed: \$?=$?\n"; + } $rv->{-opt_c} = \@opt_c if @opt_c; # for ->urlmatch $rv->{-f} = $file; bless $rv, $class; diff --git a/lib/PublicInbox/Gcf2.pm b/lib/PublicInbox/Gcf2.pm index 78392990..b96011e8 100644 --- a/lib/PublicInbox/Gcf2.pm +++ b/lib/PublicInbox/Gcf2.pm @@ -131,4 +131,12 @@ sub loop (;$) { } } +package PublicInbox::Lg2Cfg; +use v5.12; + +no warnings 'once'; +*new = \&PublicInbox::Gcf2::cfg_new; +*DESTROY = \&PublicInbox::Gcf2::cfg_DESTROY; +*lz = \&PublicInbox::Gcf2::cfg_lz; + 1; diff --git a/lib/PublicInbox/gcf2_libgit2.h b/lib/PublicInbox/gcf2_libgit2.h index e1f0ef39..33bc0542 100644 --- a/lib/PublicInbox/gcf2_libgit2.h +++ b/lib/PublicInbox/gcf2_libgit2.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2021 all contributors + * Copyright (C) all contributors * License: AGPL-3.0+ * * libgit2 for Inline::C @@ -14,7 +14,7 @@ static void croak_if_err(int rc, const char *msg) { if (rc != GIT_OK) { - const git_error *e = giterr_last(); + const git_error *e = kkiterr_last(); croak("%d %s (%s)", rc, msg, e ? e->message : "unknown"); } @@ -140,3 +140,54 @@ int cat_oid(SV *self, int fd, SV *oidsv) return rc == GIT_OK; } + +SV *cfg_new(SV *ignore, const char *f) +{ + SV *ref, *self; + git_config *cfg; + int rc = git_config_open_ondisk(&cfg, f); + + croak_if_err(rc, "git_config_open_ondisk"); + + ref = newSViv((IV)cfg); + self = newRV_noinc(ref); + sv_bless(self, gv_stashpv("PublicInbox::Lg2Cfg", GV_ADD)); + SvREADONLY_on(ref); + + return self; +} + +static git_config *cfg_ptr(SV *self) +{ + return (git_config *)SvIV(SvRV(self)); +} + +void cfg_DESTROY(SV *self) +{ + git_config_free(cfg_ptr(self)); +} + +/* git_config_foreach cb */ +static int cfg_each_i(const git_config_entry *ent, void *ptr) +{ + PerlIO *io = ptr; + + PerlIO_write(io, ent->name, strlen(ent->name)); + if (ent->value) { + PerlIO_write(io, "\n", 1); + PerlIO_write(io, ent->value, strlen(ent->value)); + } + PerlIO_write(io, "\0", 1); + + return 0; +} + +/* `git config -lz --includes' */ +void cfg_lz(SV *self, PerlIO *io) +{ + int rc; + git_config *cfg = cfg_ptr(self); + + rc = git_config_foreach(cfg, cfg_each_i, io); + croak_if_err(rc, "git_config_foreach"); +} diff --git a/t/gcf2.t b/t/gcf2.t index 33f3bbca..ad43832a 100644 --- a/t/gcf2.t +++ b/t/gcf2.t @@ -1,9 +1,9 @@ #!perl -w -# Copyright (C) 2020-2021 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ -use strict; +use v5.12; +use autodie; use PublicInbox::TestCommon; -use Test::More; use Fcntl qw(:seek); use IO::Handle (); use POSIX qw(_exit); @@ -14,6 +14,37 @@ use PublicInbox::Syscall qw($F_SETPIPE_SZ); use PublicInbox::Import; my ($tmpdir, $for_destroy) = tmpdir(); +{ + open my $fh, '>', "$tmpdir/inc"; + print $fh "[a]\nb = c\n"; + close $fh; + open $fh, '>', "$tmpdir/top"; + print $fh "[include]\npath = $tmpdir/inc\n"; + close $fh; + open $fh, '>', \(my $buf); + my $cfg = PublicInbox::Lg2Cfg->new("$tmpdir/top"); + $cfg->lz($fh); + close $fh; + my @lg2 = split /^/sm, $buf; + my @git = xqx([qw(git config --includes -lz -f), "$tmpdir/top"]); + is_xdeeply(\@lg2, \@git, 'git(1) and our dumper matches'); + my $nr = $ENV{TEST_CFG_LEAK_NR} // 0; + for (0..$nr) { + $cfg = PublicInbox::Lg2Cfg->new("$tmpdir/top"); + open $fh, '>', '/dev/null'; + $cfg->lz($fh); + } + open $fh, '+>>', "$tmpdir/top"; + print $fh "\0include"; + close $fh; + open $fh, '>', undef; + $cfg = PublicInbox::Lg2Cfg->new("$tmpdir/top"); # WTF symbol lookup error + warn "cfg=$cfg"; + $cfg->lz($fh); + $fh->flush; + diag("size=".(-s $fh)); +} + my $gcf2 = PublicInbox::Gcf2::new(); is(ref($gcf2), 'PublicInbox::Gcf2', '::new works'); my $COPYING = 'dba13ed2ddf783ee8118c6a581dbf75305f816a3';