user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 4/5] lei_mirror: handle UTF-8 from manifest.js.gz properly
  @ 2023-03-13 12:00  5% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2023-03-13 12:00 UTC (permalink / raw)
  To: meta

This should ensure we display the "git config gitweb.owner
$OWNER" command invocation properly and also ensures we set the
description properly without triggering wide character warnings.

Also tested with a smallish iproute2 repo
(/pub/scm/linux/kernel/git/toke/iproute2.git) using my mirror:

  public-inbox-clone --remote-manifest=pub/manifest.js.gz \
    --include='*/toke/iproute2.git' --inbox-config=never \
    https://80x24.org/lore $DST

Anyways, I'm fairly certain this change and its tests are
correct; but I still struggle to understand Perl's approach to
Unicode and it's interactions with various JSON implementations.

Fixes: 0830817c132cb105 ("lei_mirror: show non-ASCII owner properly w/ --verbose")
---
 lib/PublicInbox/LeiMirror.pm | 6 +++---
 t/clone-coderepo.t           | 8 ++++++--
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm
index 3ec8170f..18932cf4 100644
--- a/lib/PublicInbox/LeiMirror.pm
+++ b/lib/PublicInbox/LeiMirror.pm
@@ -259,8 +259,7 @@ sub run_reap {
 sub start_cmd {
 	my ($self, $cmd, $opt, $fini) = @_;
 	do_reap($self);
-	utf8::decode(my $msg = "# @$cmd");
-	$self->{lei}->qerr($msg);
+	$self->{lei}->qerr("# @$cmd");
 	return if $self->{dry_run};
 	$LIVE->{spawn($cmd, undef, $opt)} = [ \&reap_cmd, $self, $cmd, $fini ]
 }
@@ -633,7 +632,7 @@ sub clone_v1 {
 	}
 
 	my $d = $self->{-ent} ? $self->{-ent}->{description} : undef;
-	$self->{'txt.description'} = $d if defined $d;
+	utf8::encode($self->{'txt.description'} = $d) if defined $d;
 	(!defined($d) && !$end) and
 		_get_txt_start($self, 'description', $fini);
 
@@ -823,6 +822,7 @@ sub update_ent {
 	$new = $self->{-ent}->{owner} // return;
 	$cur = $self->{-local_manifest}->{$key}->{owner} // "\0";
 	return if $cur eq $new;
+	utf8::encode($new); # to octets
 	my $cmd = [ qw(git config -f), "$dst/config", 'gitweb.owner', $new ];
 	start_cmd($self, $cmd, { 2 => $self->{lei}->{2} });
 }
diff --git a/t/clone-coderepo.t b/t/clone-coderepo.t
index 1f33a6d7..3a5997c9 100644
--- a/t/clone-coderepo.t
+++ b/t/clone-coderepo.t
@@ -63,11 +63,13 @@ EOM
 	my $env = { TEST_DOCROOT => "$tmpdir/src", PI_CONFIG => $pi_config };
 	$td = start_script($cmd, $env, { 3 => $tcp });
 	my $fp = sha1_hex(my $refs = xqx([@git, 'show-ref']));
+	my $alice = "\x{100}lice";
 	$m = {
 		'/a.git' => {
 			fingerprint => $fp,
 			modified => 1,
-			owner => 'Alice',
+			owner => $alice,
+			description => "${alice}'s repo",
 		},
 		'/b.git' => {
 			fingerprint => $fp,
@@ -89,9 +91,11 @@ my $cmd = [qw(-clone --inbox-config=never --manifest= --project-list=
 	--objstore= -p -q), $url, "$tmpdir/dst", '--exit-code'];
 ok(run_script($cmd), 'clone');
 is(xqx([qw(git config gitweb.owner)], { GIT_DIR => "$tmpdir/dst/a.git" }),
-	"Alice\n", 'a.git gitweb.owner set');
+	"\xc4\x80lice\n", 'a.git gitweb.owner set');
 is(xqx([qw(git config gitweb.owner)], { GIT_DIR => "$tmpdir/dst/b.git" }),
 	"Bob\n", 'b.git gitweb.owner set');
+my $desc = PublicInbox::Git::try_cat("$tmpdir/dst/a.git/description");
+is($desc, "\xc4\x80lice's repo\n", 'description set');
 
 my $dst_pl = "$tmpdir/dst/projects.list";
 my $dst_mf = "$tmpdir/dst/manifest.js.gz";

^ permalink raw reply related	[relevance 5%]

* [PATCH] lei_mirror: show non-ASCII owner properly w/ --verbose
@ 2023-02-09 12:30  7% Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2023-02-09 12:30 UTC (permalink / raw)
  To: meta

This makes the verbose progress output look nicer, but doesn't
affect the actual config file generation.
---
 lib/PublicInbox/LeiMirror.pm | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm
index f7184240..afe12bb3 100644
--- a/lib/PublicInbox/LeiMirror.pm
+++ b/lib/PublicInbox/LeiMirror.pm
@@ -259,7 +259,8 @@ sub run_reap {
 sub start_cmd {
 	my ($self, $cmd, $opt, $fini) = @_;
 	do_reap($self);
-	$self->{lei}->qerr("# @$cmd");
+	utf8::decode(my $msg = "# @$cmd");
+	$self->{lei}->qerr($msg);
 	return if $self->{dry_run};
 	$LIVE->{spawn($cmd, undef, $opt)} = [ \&reap_cmd, $self, $cmd, $fini ]
 }

^ permalink raw reply related	[relevance 7%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2023-02-09 12:30  7% [PATCH] lei_mirror: show non-ASCII owner properly w/ --verbose Eric Wong
2023-03-13 12:00     [PATCH 0/5] clone improvements Eric Wong
2023-03-13 12:00  5% ` [PATCH 4/5] lei_mirror: handle UTF-8 from manifest.js.gz properly Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).