git@vger.kernel.org list mirror (unofficial, one of many)
 help / color / mirror / code / Atom feed
* [PATCH] t: use user-specific utf-8 locale for testing
@ 2021-06-02 11:46 Đoàn Trần Công Danh
  2021-06-02 19:56 ` Taylor Blau
                   ` (6 more replies)
  0 siblings, 7 replies; 19+ messages in thread
From: Đoàn Trần Công Danh @ 2021-06-02 11:46 UTC (permalink / raw)
  To: git; +Cc: Đoàn Trần Công Danh

In some test-cases, utf-8 locale is required. To find such locale,
we're using the first available UTF-8 locale that returned by
"locale -a".

Despite being required by POSIX, locale(1) is unavailable in some
systems, e.g. Linux with musl libc.  Some of those systems support
utf-8 locale out of the box.

However, without "locale -a", we can't guess provided UTF-8 locale.

Let's give users of those systems an option to have better test
coverage.

Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
---
 Makefile                                 |  6 ++++++
 t/lib-git-svn.sh                         | 15 +++++++++------
 t/t9100-git-svn-basic.sh                 | 14 +++-----------
 t/t9115-git-svn-dcommit-funky-renames.sh |  6 +++---
 t/t9129-git-svn-i18n-commitencoding.sh   |  4 ++--
 5 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/Makefile b/Makefile
index c3565fc0f8..4b2c24e5ea 100644
--- a/Makefile
+++ b/Makefile
@@ -398,6 +398,9 @@ all::
 # with a different indexfile format version.  If it isn't set the index
 # file format used is index-v[23].
 #
+# Define GIT_TEST_UTF8_LOCALE to prefered utf-8 locale for testing.
+# If it isn't set, use the first utf-8 locale returned by "locale -a".
+#
 # Define HAVE_CLOCK_GETTIME if your platform has clock_gettime.
 #
 # Define HAVE_CLOCK_MONOTONIC if your platform has CLOCK_MONOTONIC.
@@ -2801,6 +2804,9 @@ ifdef GIT_TEST_CMP
 endif
 ifdef GIT_TEST_CMP_USE_COPIED_CONTEXT
 	@echo GIT_TEST_CMP_USE_COPIED_CONTEXT=YesPlease >>$@+
+endif
+ifdef GIT_TEST_UTF8_LOCALE
+	@echo GIT_TEST_UTF8_LOCALE=\''$(subst ','\'',$(subst ','\'',$(GIT_TEST_UTF8_LOCALE)))'\' >>$@+
 endif
 	@echo NO_GETTEXT=\''$(subst ','\'',$(subst ','\'',$(NO_GETTEXT)))'\' >>$@+
 ifdef GIT_PERF_REPEAT_COUNT
diff --git a/t/lib-git-svn.sh b/t/lib-git-svn.sh
index 547eb3c31a..df319593f7 100644
--- a/t/lib-git-svn.sh
+++ b/t/lib-git-svn.sh
@@ -121,12 +121,15 @@ start_svnserve () {
 		 --listen-host 127.0.0.1 &
 }
 
-prepare_a_utf8_locale () {
-	a_utf8_locale=$(locale -a | sed -n '/\.[uU][tT][fF]-*8$/{
-	p
-	q
-}')
-	if test -n "$a_utf8_locale"
+prepare_utf8_locale () {
+	if test -z "$GIT_TEST_UTF8_LOCALE"
+	then
+		GIT_TEST_UTF8_LOCALE=$(locale -a | sed -n '/\.[uU][tT][fF]-*8$/{
+		p
+		q
+	}')
+	fi
+	if test -n "$GIT_TEST_UTF8_LOCALE"
 	then
 		test_set_prereq UTF8
 	else
diff --git a/t/t9100-git-svn-basic.sh b/t/t9100-git-svn-basic.sh
index 1d3fdcc997..d5563ec35f 100755
--- a/t/t9100-git-svn-basic.sh
+++ b/t/t9100-git-svn-basic.sh
@@ -4,21 +4,13 @@
 #
 
 test_description='git svn basic tests'
-GIT_SVN_LC_ALL=${LC_ALL:-$LANG}
 
 GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
 export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
 
 . ./lib-git-svn.sh
 
-case "$GIT_SVN_LC_ALL" in
-*.UTF-8)
-	test_set_prereq UTF8
-	;;
-*)
-	say "# UTF-8 locale not set, some tests skipped ($GIT_SVN_LC_ALL)"
-	;;
-esac
+prepare_utf8_locale
 
 test_expect_success 'git svn --version works anywhere' '
 	nongit git svn --version
@@ -187,8 +179,8 @@ test_expect_success POSIXPERM,SYMLINKS "$name" '
 	test ! -h "$SVN_TREE"/exec-2.sh &&
 	test_cmp help "$SVN_TREE"/exec-2.sh'
 
-name="commit with UTF-8 message: locale: $GIT_SVN_LC_ALL"
-LC_ALL="$GIT_SVN_LC_ALL"
+name="commit with UTF-8 message: locale: $GIT_TEST_UTF8_LOCALE"
+LC_ALL="$GIT_TEST_UTF8_LOCALE"
 export LC_ALL
 # This test relies on the previous test, hence requires POSIXPERM,SYMLINKS
 test_expect_success UTF8,POSIXPERM,SYMLINKS "$name" "
diff --git a/t/t9115-git-svn-dcommit-funky-renames.sh b/t/t9115-git-svn-dcommit-funky-renames.sh
index 9b44a44bc1..743fbe1fe4 100755
--- a/t/t9115-git-svn-dcommit-funky-renames.sh
+++ b/t/t9115-git-svn-dcommit-funky-renames.sh
@@ -93,9 +93,9 @@ test_expect_success 'git svn rebase works inside a fresh-cloned repository' '
 # > ... All of the above characters, except for the backslash, are converted
 # > to special UNICODE characters in the range 0xf000 to 0xf0ff (the
 # > "Private use area") when creating or accessing files.
-prepare_a_utf8_locale
+prepare_utf8_locale
 test_expect_success UTF8,!MINGW,!UTF8_NFD_TO_NFC 'svn.pathnameencoding=cp932 new file on dcommit' '
-	LC_ALL=$a_utf8_locale &&
+	LC_ALL=$GIT_TEST_UTF8_LOCALE &&
 	export LC_ALL &&
 	neq=$(printf "\201\202") &&
 	git config svn.pathnameencoding cp932 &&
@@ -107,7 +107,7 @@ test_expect_success UTF8,!MINGW,!UTF8_NFD_TO_NFC 'svn.pathnameencoding=cp932 new
 
 # See the comment on the above test for setting of LC_ALL.
 test_expect_success !MINGW,!UTF8_NFD_TO_NFC 'svn.pathnameencoding=cp932 rename on dcommit' '
-	LC_ALL=$a_utf8_locale &&
+	LC_ALL=$GIT_TEST_UTF8_LOCALE &&
 	export LC_ALL &&
 	inf=$(printf "\201\207") &&
 	git config svn.pathnameencoding cp932 &&
diff --git a/t/t9129-git-svn-i18n-commitencoding.sh b/t/t9129-git-svn-i18n-commitencoding.sh
index 2c213ae654..01e1e8a8f7 100755
--- a/t/t9129-git-svn-i18n-commitencoding.sh
+++ b/t/t9129-git-svn-i18n-commitencoding.sh
@@ -14,12 +14,12 @@ compare_git_head_with () {
 	test_cmp current "$1"
 }
 
-prepare_a_utf8_locale
+prepare_utf8_locale
 
 compare_svn_head_with () {
 	# extract just the log message and strip out committer info.
 	# don't use --limit here since svn 1.1.x doesn't have it,
-	LC_ALL="$a_utf8_locale" svn log $(git svn info --url) | perl -w -e '
+	LC_ALL="$GIT_TEST_UTF8_LOCALE" svn log $(git svn info --url) | perl -w -e '
 		use bytes;
 		$/ = ("-"x72) . "\n";
 		my @x = <STDIN>;
-- 
2.32.0.rc2.1.gd85b21334f


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] t: use user-specific utf-8 locale for testing
  2021-06-02 11:46 [PATCH] t: use user-specific utf-8 locale for testing Đoàn Trần Công Danh
@ 2021-06-02 19:56 ` Taylor Blau
  2021-06-08 10:49   ` Ævar Arnfjörð Bjarmason
  2021-06-03 19:27 ` Jeff King
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 19+ messages in thread
From: Taylor Blau @ 2021-06-02 19:56 UTC (permalink / raw)
  To: Đoàn Trần Công Danh; +Cc: git

On Wed, Jun 02, 2021 at 06:46:46PM +0700, Đoàn Trần Công Danh wrote:
> Despite being required by POSIX, locale(1) is unavailable in some
> systems, e.g. Linux with musl libc.  Some of those systems support
> utf-8 locale out of the box.

Hmmph. I would have imagined that locale was available everywhere, but
unfortunately not.

> diff --git a/Makefile b/Makefile
> index c3565fc0f8..4b2c24e5ea 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -398,6 +398,9 @@ all::
>  # with a different indexfile format version.  If it isn't set the index
>  # file format used is index-v[23].
>  #
> +# Define GIT_TEST_UTF8_LOCALE to prefered utf-8 locale for testing.
> +# If it isn't set, use the first utf-8 locale returned by "locale -a".

s/prefered/preferred

> +#
>  # Define HAVE_CLOCK_GETTIME if your platform has clock_gettime.
>  #
>  # Define HAVE_CLOCK_MONOTONIC if your platform has CLOCK_MONOTONIC.
> @@ -2801,6 +2804,9 @@ ifdef GIT_TEST_CMP
>  endif
>  ifdef GIT_TEST_CMP_USE_COPIED_CONTEXT
>  	@echo GIT_TEST_CMP_USE_COPIED_CONTEXT=YesPlease >>$@+
> +endif
> +ifdef GIT_TEST_UTF8_LOCALE
> +	@echo GIT_TEST_UTF8_LOCALE=\''$(subst ','\'',$(subst ','\'',$(GIT_TEST_UTF8_LOCALE)))'\' >>$@+
>  endif
>  	@echo NO_GETTEXT=\''$(subst ','\'',$(subst ','\'',$(NO_GETTEXT)))'\' >>$@+
>  ifdef GIT_PERF_REPEAT_COUNT
> diff --git a/t/lib-git-svn.sh b/t/lib-git-svn.sh
> index 547eb3c31a..df319593f7 100644
> --- a/t/lib-git-svn.sh
> +++ b/t/lib-git-svn.sh
> @@ -121,12 +121,15 @@ start_svnserve () {
>  		 --listen-host 127.0.0.1 &
>  }
>
> -prepare_a_utf8_locale () {
> -	a_utf8_locale=$(locale -a | sed -n '/\.[uU][tT][fF]-*8$/{
> -	p
> -	q
> -}')
> -	if test -n "$a_utf8_locale"
> +prepare_utf8_locale () {
> +	if test -z "$GIT_TEST_UTF8_LOCALE"
> +	then
> +		GIT_TEST_UTF8_LOCALE=$(locale -a | sed -n '/\.[uU][tT][fF]-*8$/{
> +		p
> +		q
> +	}')
> +	fi

OK, so we bind GIT_TEST_UTF8_LOCALE to the value of $a_utf8_locale in
the pre-image, unless the user said otherwise.

> +	if test -n "$GIT_TEST_UTF8_LOCALE"

...Then we go on to handle things like before, except we read from
"$GIT_TEST_UTF8_LOCALE" instead of "$a_utf8_locale". Makes sense to me.

>  	then
>  		test_set_prereq UTF8
>  	else
> diff --git a/t/t9100-git-svn-basic.sh b/t/t9100-git-svn-basic.sh
> index 1d3fdcc997..d5563ec35f 100755
> --- a/t/t9100-git-svn-basic.sh
> +++ b/t/t9100-git-svn-basic.sh
> @@ -4,21 +4,13 @@
>  #
>
>  test_description='git svn basic tests'
> -GIT_SVN_LC_ALL=${LC_ALL:-$LANG}
>
>  GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
>  export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
>
>  . ./lib-git-svn.sh
>
> -case "$GIT_SVN_LC_ALL" in
> -*.UTF-8)
> -	test_set_prereq UTF8
> -	;;
> -*)
> -	say "# UTF-8 locale not set, some tests skipped ($GIT_SVN_LC_ALL)"
> -	;;
> -esac
> +prepare_utf8_locale

This change (and the omitted ones below in later hunks) look like it
isn't changing any behavior (and just running the same code behind the
prepare_utf8_locale function instead of inlining it).

They all look right to me, but it may be helpful to either point it out
in the commit message and/or prepare the separately. I'd probably err on
the side of the former.

That said, this patch looks good to me with minor touch-ups (my only
nits are the above and the spelling mistake in the Makefile).

Thanks,
Taylor

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] t: use user-specific utf-8 locale for testing
  2021-06-02 11:46 [PATCH] t: use user-specific utf-8 locale for testing Đoàn Trần Công Danh
  2021-06-02 19:56 ` Taylor Blau
@ 2021-06-03 19:27 ` Jeff King
  2021-06-04  3:32 ` Bagas Sanjaya
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 19+ messages in thread
From: Jeff King @ 2021-06-03 19:27 UTC (permalink / raw)
  To: Đoàn Trần Công Danh; +Cc: git

On Wed, Jun 02, 2021 at 06:46:46PM +0700, Đoàn Trần Công Danh wrote:

> In some test-cases, utf-8 locale is required. To find such locale,
> we're using the first available UTF-8 locale that returned by
> "locale -a".
> 
> Despite being required by POSIX, locale(1) is unavailable in some
> systems, e.g. Linux with musl libc.  Some of those systems support
> utf-8 locale out of the box.
> 
> However, without "locale -a", we can't guess provided UTF-8 locale.
> 
> Let's give users of those systems an option to have better test
> coverage.

That makes sense. At first I thought this was fixing a test breakage,
and wondered if we should simply be skipping the tests. But after
reading more carefully, we already are doing that, and this is just
allowing those tests to run in more places (you even said so in the last
sentence above, but I missed it, so I am calling it out).

I kind of wonder if trying "C.UTF-8" would be a reasonable fallback so
that people don't even have to set this extra Makefile knob. But I'm not
sure if we have a good way of testing if that locale works (if we can't
find the "locale" binary). So this is probably a reasonable medium
ground.

> ---
>  Makefile                                 |  6 ++++++
>  t/lib-git-svn.sh                         | 15 +++++++++------
>  t/t9100-git-svn-basic.sh                 | 14 +++-----------
>  t/t9115-git-svn-dcommit-funky-renames.sh |  6 +++---
>  t/t9129-git-svn-i18n-commitencoding.sh   |  4 ++--
>  5 files changed, 23 insertions(+), 22 deletions(-)

I think we test musl in our ci builds; should we do something like:

diff --git a/ci/lib.sh b/ci/lib.sh
index d848c036c5..ec67398c18 100755
--- a/ci/lib.sh
+++ b/ci/lib.sh
@@ -229,6 +229,7 @@ linux-musl)
 	CC=gcc
 	MAKEFLAGS="$MAKEFLAGS PYTHON_PATH=/usr/bin/python3 USE_LIBPCRE2=Yes"
 	MAKEFLAGS="$MAKEFLAGS NO_REGEX=Yes ICONV_OMITS_BOM=Yes"
+	MAKEFLAGS="$MAKEFLAGS GIT_TEST_UTF8_LOCALE=something.UTF-8?"
 	;;
 esac
 

here, too (I don't know what the correct "something" is here; maybe
"C")?

-Peff

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] t: use user-specific utf-8 locale for testing
  2021-06-02 11:46 [PATCH] t: use user-specific utf-8 locale for testing Đoàn Trần Công Danh
  2021-06-02 19:56 ` Taylor Blau
  2021-06-03 19:27 ` Jeff King
@ 2021-06-04  3:32 ` Bagas Sanjaya
  2021-06-04  5:20   ` Đoàn Trần Công Danh
  2021-06-06 16:33 ` [PATCH v2] " Đoàn Trần Công Danh
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 19+ messages in thread
From: Bagas Sanjaya @ 2021-06-04  3:32 UTC (permalink / raw)
  To: Đoàn Trần Công Danh, git

Hi Đoàn,

On 02/06/21 18.46, Đoàn Trần Công Danh wrote:
> In some test-cases, utf-8 locale is required. To find such locale,
> we're using the first available UTF-8 locale that returned by
> "locale -a".
> 

On most systems, the first available such locale is C.UTF-8, so why 
don't we just use it?

> Despite being required by POSIX, locale(1) is unavailable in some
> systems, e.g. Linux with musl libc.  Some of those systems support
> utf-8 locale out of the box.
> 
> However, without "locale -a", we can't guess provided UTF-8 locale.
> 
> Let's give users of those systems an option to have better test
> coverage.
> 

So can we assume to UTF-8 locale as default for testing on systems 
without locale(1)?

> Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
> ---
>   Makefile                                 |  6 ++++++
>   t/lib-git-svn.sh                         | 15 +++++++++------
>   t/t9100-git-svn-basic.sh                 | 14 +++-----------
>   t/t9115-git-svn-dcommit-funky-renames.sh |  6 +++---
>   t/t9129-git-svn-i18n-commitencoding.sh   |  4 ++--
>   5 files changed, 23 insertions(+), 22 deletions(-)
> 
> diff --git a/Makefile b/Makefile
> index c3565fc0f8..4b2c24e5ea 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -398,6 +398,9 @@ all::
>   # with a different indexfile format version.  If it isn't set the index
>   # file format used is index-v[23].
>   #
> +# Define GIT_TEST_UTF8_LOCALE to prefered utf-8 locale for testing.
> +# If it isn't set, use the first utf-8 locale returned by "locale -a".
> +#

Better say "... to prefer utf-8 locale for testing".

-- 
An old man doll... just what I always wanted! - Clara

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] t: use user-specific utf-8 locale for testing
  2021-06-04  3:32 ` Bagas Sanjaya
@ 2021-06-04  5:20   ` Đoàn Trần Công Danh
  0 siblings, 0 replies; 19+ messages in thread
From: Đoàn Trần Công Danh @ 2021-06-04  5:20 UTC (permalink / raw)
  To: Bagas Sanjaya; +Cc: git

On 2021-06-04 10:32:04+0700, Bagas Sanjaya <bagasdotme@gmail.com> wrote:
> Hi Đoàn,
> 
> On 02/06/21 18.46, Đoàn Trần Công Danh wrote:
> > In some test-cases, utf-8 locale is required. To find such locale,
> > we're using the first available UTF-8 locale that returned by
> > "locale -a".
> > 
> 
> On most systems, the first available such locale is C.UTF-8, so why don't we
> just use it?

I don't know. I think in Linux with glibc, C.UTF-8 must be enable
explicitly and run "localedef"?
Worse, systems have a right to not have any UTF-8 locales.
Someone may only enable C and en_US.ISO-8859-1

> > Despite being required by POSIX, locale(1) is unavailable in some
> > systems, e.g. Linux with musl libc.  Some of those systems support
> > utf-8 locale out of the box.
> > 
> > However, without "locale -a", we can't guess provided UTF-8 locale.
> > 
> > Let's give users of those systems an option to have better test
> > coverage.
> > 
> 
> So can we assume to UTF-8 locale as default for testing on systems without
> locale(1)?

I don't know, either. POSIX only requires 2 locales, C and POSIX,
All other locales are optional:

	Conforming implementations shall support one or more coded
	character sets. 

I don't know if such systems exist or not.

C and POSIX treats any bytes higher than 0x80 as arbitrary characters [1].

	The POSIX locale shall contain 256 single-byte characters
	including the characters

> > Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
> > ---
> >   Makefile                                 |  6 ++++++
> >   t/lib-git-svn.sh                         | 15 +++++++++------
> >   t/t9100-git-svn-basic.sh                 | 14 +++-----------
> >   t/t9115-git-svn-dcommit-funky-renames.sh |  6 +++---
> >   t/t9129-git-svn-i18n-commitencoding.sh   |  4 ++--
> >   5 files changed, 23 insertions(+), 22 deletions(-)
> > 
> > diff --git a/Makefile b/Makefile
> > index c3565fc0f8..4b2c24e5ea 100644
> > --- a/Makefile
> > +++ b/Makefile
> > @@ -398,6 +398,9 @@ all::
> >   # with a different indexfile format version.  If it isn't set the index
> >   # file format used is index-v[23].
> >   #
> > +# Define GIT_TEST_UTF8_LOCALE to prefered utf-8 locale for testing.
> > +# If it isn't set, use the first utf-8 locale returned by "locale -a".
> > +#
> 
> Better say "... to prefer utf-8 locale for testing".

I intended to say:

	... to *the* preferred ...

We need utf-8 locale to test anyway ;) So, we don't prefer utf-8 locale,
we pick a utf-8 locale from a handful of utf-8 locales.


1: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap06.html#tag_06_02

-- 
Danh

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH v2] t: use user-specific utf-8 locale for testing
  2021-06-02 11:46 [PATCH] t: use user-specific utf-8 locale for testing Đoàn Trần Công Danh
                   ` (2 preceding siblings ...)
  2021-06-04  3:32 ` Bagas Sanjaya
@ 2021-06-06 16:33 ` Đoàn Trần Công Danh
  2021-06-06 20:06   ` Torsten Bögershausen
  2021-06-07  0:48 ` [PATCH v3] t: use pre-defined utf-8 locale for testing svn Đoàn Trần Công Danh
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 19+ messages in thread
From: Đoàn Trần Công Danh @ 2021-06-06 16:33 UTC (permalink / raw)
  To: git
  Cc: Đoàn Trần Công Danh, Taylor Blau, Jeff King,
	Bagas Sanjaya

In some test-cases, utf-8 locale is required. To find such locale,
we're using the first available UTF-8 locale that returned by
"locale -a".

Despite being required by POSIX, locale(1) is unavailable in some
systems, e.g. Linux with musl libc.  Some of those systems support
utf-8 locale out of the box.

However, without "locale -a", we can't guess provided UTF-8 locale.

Let's give users of those systems an option to have better test
coverage.

This change also rename t/lib-git-svn.sh:prepare_a_utf8_locale to
prepare_utf8_locale, since we no longer prepare the variable named
"a_utf8_locale" but set up a fallback value for GIT_TEST_UTF8_LOCALE
instead.  The fallback will be LC_ALL, LANG environment variable,
or the first utf-8 locale from output of "locale -a", in that order.

Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
---
Range-diff against v1:
1:  d242ce64c4 ! 1:  f299ae2239 t: use user-specific utf-8 locale for testing
    @@ Commit message
         Let's give users of those systems an option to have better test
         coverage.
     
    +    This change also rename t/lib-git-svn.sh:prepare_a_utf8_locale to
    +    prepare_utf8_locale, since we no longer prepare the variable named
    +    "a_utf8_locale" but set up a fallback value for GIT_TEST_UTF8_LOCALE
    +    instead.  The fallback will be LC_ALL, LANG environment variable,
    +    or the first utf-8 locale from output of "locale -a", in that order.
    +
         Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
     
      ## Makefile ##
    @@ Makefile: all::
      # with a different indexfile format version.  If it isn't set the index
      # file format used is index-v[23].
      #
    -+# Define GIT_TEST_UTF8_LOCALE to prefered utf-8 locale for testing.
    -+# If it isn't set, use the first utf-8 locale returned by "locale -a".
    ++# Define GIT_TEST_UTF8_LOCALE to preferred utf-8 locale for testing.
    ++# If it isn't set, fallback to $LC_ALL, $LANG or use the first utf-8
    ++# locale returned by "locale -a".
     +#
      # Define HAVE_CLOCK_GETTIME if your platform has clock_gettime.
      #
    @@ Makefile: ifdef GIT_TEST_CMP
      	@echo NO_GETTEXT=\''$(subst ','\'',$(subst ','\'',$(NO_GETTEXT)))'\' >>$@+
      ifdef GIT_PERF_REPEAT_COUNT
     
    + ## ci/lib.sh ##
    +@@ ci/lib.sh: linux-musl)
    + 	CC=gcc
    + 	MAKEFLAGS="$MAKEFLAGS PYTHON_PATH=/usr/bin/python3 USE_LIBPCRE2=Yes"
    + 	MAKEFLAGS="$MAKEFLAGS NO_REGEX=Yes ICONV_OMITS_BOM=Yes"
    ++	MAKEFLAGS="$MAKEFLAGS GIT_TEST_UTF8_LOCALE=C.UTF-8"
    + 	;;
    + esac
    + 
    +
      ## t/lib-git-svn.sh ##
     @@ t/lib-git-svn.sh: start_svnserve () {
      		 --listen-host 127.0.0.1 &
    @@ t/lib-git-svn.sh: start_svnserve () {
     -}')
     -	if test -n "$a_utf8_locale"
     +prepare_utf8_locale () {
    -+	if test -z "$GIT_TEST_UTF8_LOCALE"
    ++	if test -n "$GIT_TEST_UTF8_LOCALE"
    ++	then
    ++		: test_set_prereq UTF8
    ++	elif test -n "${LC_ALL:-$LANG}"
     +	then
    ++		case "${LC_ALL:-$LANG}" in
    ++		*.[Uu][Tt][Ff]8 | *.[Uu][Tt][Ff]-8)
    ++			GIT_TEST_UTF8_LOCALE="${LC_ALL:-$LANG}"
    ++			;;
    ++		esac
    ++	else
     +		GIT_TEST_UTF8_LOCALE=$(locale -a | sed -n '/\.[uU][tT][fF]-*8$/{
     +		p
     +		q

 Makefile                                 |  7 +++++++
 ci/lib.sh                                |  1 +
 t/lib-git-svn.sh                         | 24 ++++++++++++++++++------
 t/t9100-git-svn-basic.sh                 | 14 +++-----------
 t/t9115-git-svn-dcommit-funky-renames.sh |  6 +++---
 t/t9129-git-svn-i18n-commitencoding.sh   |  4 ++--
 6 files changed, 34 insertions(+), 22 deletions(-)

diff --git a/Makefile b/Makefile
index c3565fc0f8..502e0c9a81 100644
--- a/Makefile
+++ b/Makefile
@@ -398,6 +398,10 @@ all::
 # with a different indexfile format version.  If it isn't set the index
 # file format used is index-v[23].
 #
+# Define GIT_TEST_UTF8_LOCALE to preferred utf-8 locale for testing.
+# If it isn't set, fallback to $LC_ALL, $LANG or use the first utf-8
+# locale returned by "locale -a".
+#
 # Define HAVE_CLOCK_GETTIME if your platform has clock_gettime.
 #
 # Define HAVE_CLOCK_MONOTONIC if your platform has CLOCK_MONOTONIC.
@@ -2801,6 +2805,9 @@ ifdef GIT_TEST_CMP
 endif
 ifdef GIT_TEST_CMP_USE_COPIED_CONTEXT
 	@echo GIT_TEST_CMP_USE_COPIED_CONTEXT=YesPlease >>$@+
+endif
+ifdef GIT_TEST_UTF8_LOCALE
+	@echo GIT_TEST_UTF8_LOCALE=\''$(subst ','\'',$(subst ','\'',$(GIT_TEST_UTF8_LOCALE)))'\' >>$@+
 endif
 	@echo NO_GETTEXT=\''$(subst ','\'',$(subst ','\'',$(NO_GETTEXT)))'\' >>$@+
 ifdef GIT_PERF_REPEAT_COUNT
diff --git a/ci/lib.sh b/ci/lib.sh
index d848c036c5..476c3f369f 100755
--- a/ci/lib.sh
+++ b/ci/lib.sh
@@ -229,6 +229,7 @@ linux-musl)
 	CC=gcc
 	MAKEFLAGS="$MAKEFLAGS PYTHON_PATH=/usr/bin/python3 USE_LIBPCRE2=Yes"
 	MAKEFLAGS="$MAKEFLAGS NO_REGEX=Yes ICONV_OMITS_BOM=Yes"
+	MAKEFLAGS="$MAKEFLAGS GIT_TEST_UTF8_LOCALE=C.UTF-8"
 	;;
 esac
 
diff --git a/t/lib-git-svn.sh b/t/lib-git-svn.sh
index 547eb3c31a..83efc17661 100644
--- a/t/lib-git-svn.sh
+++ b/t/lib-git-svn.sh
@@ -121,12 +121,24 @@ start_svnserve () {
 		 --listen-host 127.0.0.1 &
 }
 
-prepare_a_utf8_locale () {
-	a_utf8_locale=$(locale -a | sed -n '/\.[uU][tT][fF]-*8$/{
-	p
-	q
-}')
-	if test -n "$a_utf8_locale"
+prepare_utf8_locale () {
+	if test -n "$GIT_TEST_UTF8_LOCALE"
+	then
+		: test_set_prereq UTF8
+	elif test -n "${LC_ALL:-$LANG}"
+	then
+		case "${LC_ALL:-$LANG}" in
+		*.[Uu][Tt][Ff]8 | *.[Uu][Tt][Ff]-8)
+			GIT_TEST_UTF8_LOCALE="${LC_ALL:-$LANG}"
+			;;
+		esac
+	else
+		GIT_TEST_UTF8_LOCALE=$(locale -a | sed -n '/\.[uU][tT][fF]-*8$/{
+		p
+		q
+	}')
+	fi
+	if test -n "$GIT_TEST_UTF8_LOCALE"
 	then
 		test_set_prereq UTF8
 	else
diff --git a/t/t9100-git-svn-basic.sh b/t/t9100-git-svn-basic.sh
index 1d3fdcc997..d5563ec35f 100755
--- a/t/t9100-git-svn-basic.sh
+++ b/t/t9100-git-svn-basic.sh
@@ -4,21 +4,13 @@
 #
 
 test_description='git svn basic tests'
-GIT_SVN_LC_ALL=${LC_ALL:-$LANG}
 
 GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
 export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
 
 . ./lib-git-svn.sh
 
-case "$GIT_SVN_LC_ALL" in
-*.UTF-8)
-	test_set_prereq UTF8
-	;;
-*)
-	say "# UTF-8 locale not set, some tests skipped ($GIT_SVN_LC_ALL)"
-	;;
-esac
+prepare_utf8_locale
 
 test_expect_success 'git svn --version works anywhere' '
 	nongit git svn --version
@@ -187,8 +179,8 @@ test_expect_success POSIXPERM,SYMLINKS "$name" '
 	test ! -h "$SVN_TREE"/exec-2.sh &&
 	test_cmp help "$SVN_TREE"/exec-2.sh'
 
-name="commit with UTF-8 message: locale: $GIT_SVN_LC_ALL"
-LC_ALL="$GIT_SVN_LC_ALL"
+name="commit with UTF-8 message: locale: $GIT_TEST_UTF8_LOCALE"
+LC_ALL="$GIT_TEST_UTF8_LOCALE"
 export LC_ALL
 # This test relies on the previous test, hence requires POSIXPERM,SYMLINKS
 test_expect_success UTF8,POSIXPERM,SYMLINKS "$name" "
diff --git a/t/t9115-git-svn-dcommit-funky-renames.sh b/t/t9115-git-svn-dcommit-funky-renames.sh
index 9b44a44bc1..743fbe1fe4 100755
--- a/t/t9115-git-svn-dcommit-funky-renames.sh
+++ b/t/t9115-git-svn-dcommit-funky-renames.sh
@@ -93,9 +93,9 @@ test_expect_success 'git svn rebase works inside a fresh-cloned repository' '
 # > ... All of the above characters, except for the backslash, are converted
 # > to special UNICODE characters in the range 0xf000 to 0xf0ff (the
 # > "Private use area") when creating or accessing files.
-prepare_a_utf8_locale
+prepare_utf8_locale
 test_expect_success UTF8,!MINGW,!UTF8_NFD_TO_NFC 'svn.pathnameencoding=cp932 new file on dcommit' '
-	LC_ALL=$a_utf8_locale &&
+	LC_ALL=$GIT_TEST_UTF8_LOCALE &&
 	export LC_ALL &&
 	neq=$(printf "\201\202") &&
 	git config svn.pathnameencoding cp932 &&
@@ -107,7 +107,7 @@ test_expect_success UTF8,!MINGW,!UTF8_NFD_TO_NFC 'svn.pathnameencoding=cp932 new
 
 # See the comment on the above test for setting of LC_ALL.
 test_expect_success !MINGW,!UTF8_NFD_TO_NFC 'svn.pathnameencoding=cp932 rename on dcommit' '
-	LC_ALL=$a_utf8_locale &&
+	LC_ALL=$GIT_TEST_UTF8_LOCALE &&
 	export LC_ALL &&
 	inf=$(printf "\201\207") &&
 	git config svn.pathnameencoding cp932 &&
diff --git a/t/t9129-git-svn-i18n-commitencoding.sh b/t/t9129-git-svn-i18n-commitencoding.sh
index 2c213ae654..01e1e8a8f7 100755
--- a/t/t9129-git-svn-i18n-commitencoding.sh
+++ b/t/t9129-git-svn-i18n-commitencoding.sh
@@ -14,12 +14,12 @@ compare_git_head_with () {
 	test_cmp current "$1"
 }
 
-prepare_a_utf8_locale
+prepare_utf8_locale
 
 compare_svn_head_with () {
 	# extract just the log message and strip out committer info.
 	# don't use --limit here since svn 1.1.x doesn't have it,
-	LC_ALL="$a_utf8_locale" svn log $(git svn info --url) | perl -w -e '
+	LC_ALL="$GIT_TEST_UTF8_LOCALE" svn log $(git svn info --url) | perl -w -e '
 		use bytes;
 		$/ = ("-"x72) . "\n";
 		my @x = <STDIN>;
-- 
2.32.0.rc3.5.gf3d78db977


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2] t: use user-specific utf-8 locale for testing
  2021-06-06 16:33 ` [PATCH v2] " Đoàn Trần Công Danh
@ 2021-06-06 20:06   ` Torsten Bögershausen
  2021-06-07  0:20     ` Junio C Hamano
  0 siblings, 1 reply; 19+ messages in thread
From: Torsten Bögershausen @ 2021-06-06 20:06 UTC (permalink / raw)
  To: Đoàn Trần Công Danh
  Cc: git, Taylor Blau, Jeff King, Bagas Sanjaya

This all looks good.
Some suggestions about the commit message are inline.

On Sun, Jun 06, 2021 at 11:33:16PM +0700, Đoàn Trần Công Danh wrote:
> In some test-cases, utf-8 locale is required. To find such locale,
> we're using the first available UTF-8 locale that returned by
> "locale -a".

Good explanation.
I think that in generaral "utf-8" as a specification/specifier is better
written as "UTF-8", with uppercase.
"utf-8" or utf8 may be used inside the code, depending on the language.

>
> Despite being required by POSIX, locale(1) is unavailable in some
> systems, e.g. Linux with musl libc.  Some of those systems support
> utf-8 locale out of the box.
This reads a little bit harsh (the first sentence) and it is not
fully clear which systems do what (the second sentence).
Or are 2 things mentioned - the locale(1) utility and the support
of one UTF-8 locale "out of the box" ?
Does Linux with musl libs support an UTF-8 locale, but not
the locale(1) untility ?
Git itself supports many systems, that are not POSIX compliant,
strictly speaking. But if avaliable, the functions defined in POSIX
are used, whenever available.

Could we write:
However, the locale(1) utility is unavailable on some systems,
e.g. Linux with musl libc.

>
> However, without "locale -a", we can't guess provided UTF-8 locale.
>
> Let's give users of those systems an option to have better test
> coverage.

Add a Makefile knob GIT_TEST_UTF8_LOCALE and activate it for linux-musl

>
> This change also rename t/lib-git-svn.sh:prepare_a_utf8_locale to
> prepare_utf8_locale, since we no longer prepare the variable named
> "a_utf8_locale" but set up a fallback value for GIT_TEST_UTF8_LOCALE
> instead.  The fallback will be LC_ALL, LANG environment variable,
> or the first utf-8 locale from output of "locale -a", in that order.

rename -> renames, may be drop "This change", like this ?
Rename t/lib-git-svn.sh:prepare_a_utf8_locale into prepare_utf8_locale,
since we no longer prepare the variable named "a_utf8_locale",
but set up a fallback value for GIT_TEST_UTF8_LOCALE instead.
The fallback will be LC_ALL, LANG environment variable,
or the first UTF-8 locale from output of "locale -a", in that order.


>
> Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
> ---
> Range-diff against v1:
> 1:  d242ce64c4 ! 1:  f299ae2239 t: use user-specific utf-8 locale for testing
>     @@ Commit message
>          Let's give users of those systems an option to have better test
>          coverage.
>
>     +    This change also rename t/lib-git-svn.sh:prepare_a_utf8_locale to
>     +    prepare_utf8_locale, since we no longer prepare the variable named
>     +    "a_utf8_locale" but set up a fallback value for GIT_TEST_UTF8_LOCALE
>     +    instead.  The fallback will be LC_ALL, LANG environment variable,
>     +    or the first utf-8 locale from output of "locale -a", in that order.
>     +
>          Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
>
>       ## Makefile ##
>     @@ Makefile: all::
>       # with a different indexfile format version.  If it isn't set the index
>       # file format used is index-v[23].
>       #
>     -+# Define GIT_TEST_UTF8_LOCALE to prefered utf-8 locale for testing.
>     -+# If it isn't set, use the first utf-8 locale returned by "locale -a".
>     ++# Define GIT_TEST_UTF8_LOCALE to preferred utf-8 locale for testing.
>     ++# If it isn't set, fallback to $LC_ALL, $LANG or use the first utf-8
>     ++# locale returned by "locale -a".
>      +#
>       # Define HAVE_CLOCK_GETTIME if your platform has clock_gettime.
>       #
>     @@ Makefile: ifdef GIT_TEST_CMP
>       	@echo NO_GETTEXT=\''$(subst ','\'',$(subst ','\'',$(NO_GETTEXT)))'\' >>$@+
>       ifdef GIT_PERF_REPEAT_COUNT
>
>     + ## ci/lib.sh ##
>     +@@ ci/lib.sh: linux-musl)
>     + 	CC=gcc
>     + 	MAKEFLAGS="$MAKEFLAGS PYTHON_PATH=/usr/bin/python3 USE_LIBPCRE2=Yes"
>     + 	MAKEFLAGS="$MAKEFLAGS NO_REGEX=Yes ICONV_OMITS_BOM=Yes"
>     ++	MAKEFLAGS="$MAKEFLAGS GIT_TEST_UTF8_LOCALE=C.UTF-8"
>     + 	;;
>     + esac
>     +
>     +
>       ## t/lib-git-svn.sh ##
>      @@ t/lib-git-svn.sh: start_svnserve () {
>       		 --listen-host 127.0.0.1 &
>     @@ t/lib-git-svn.sh: start_svnserve () {
>      -}')
>      -	if test -n "$a_utf8_locale"
>      +prepare_utf8_locale () {
>     -+	if test -z "$GIT_TEST_UTF8_LOCALE"
>     ++	if test -n "$GIT_TEST_UTF8_LOCALE"
>     ++	then
>     ++		: test_set_prereq UTF8
>     ++	elif test -n "${LC_ALL:-$LANG}"
>      +	then
>     ++		case "${LC_ALL:-$LANG}" in
>     ++		*.[Uu][Tt][Ff]8 | *.[Uu][Tt][Ff]-8)
>     ++			GIT_TEST_UTF8_LOCALE="${LC_ALL:-$LANG}"
>     ++			;;
>     ++		esac
>     ++	else
>      +		GIT_TEST_UTF8_LOCALE=$(locale -a | sed -n '/\.[uU][tT][fF]-*8$/{
>      +		p
>      +		q
>
>  Makefile                                 |  7 +++++++
>  ci/lib.sh                                |  1 +
>  t/lib-git-svn.sh                         | 24 ++++++++++++++++++------
>  t/t9100-git-svn-basic.sh                 | 14 +++-----------
>  t/t9115-git-svn-dcommit-funky-renames.sh |  6 +++---
>  t/t9129-git-svn-i18n-commitencoding.sh   |  4 ++--
>  6 files changed, 34 insertions(+), 22 deletions(-)
>
> diff --git a/Makefile b/Makefile
> index c3565fc0f8..502e0c9a81 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -398,6 +398,10 @@ all::
>  # with a different indexfile format version.  If it isn't set the index
>  # file format used is index-v[23].
>  #
> +# Define GIT_TEST_UTF8_LOCALE to preferred utf-8 locale for testing.
> +# If it isn't set, fallback to $LC_ALL, $LANG or use the first utf-8
> +# locale returned by "locale -a".
> +#
>  # Define HAVE_CLOCK_GETTIME if your platform has clock_gettime.
>  #
>  # Define HAVE_CLOCK_MONOTONIC if your platform has CLOCK_MONOTONIC.
> @@ -2801,6 +2805,9 @@ ifdef GIT_TEST_CMP
>  endif
>  ifdef GIT_TEST_CMP_USE_COPIED_CONTEXT
>  	@echo GIT_TEST_CMP_USE_COPIED_CONTEXT=YesPlease >>$@+
> +endif
> +ifdef GIT_TEST_UTF8_LOCALE
> +	@echo GIT_TEST_UTF8_LOCALE=\''$(subst ','\'',$(subst ','\'',$(GIT_TEST_UTF8_LOCALE)))'\' >>$@+
>  endif
>  	@echo NO_GETTEXT=\''$(subst ','\'',$(subst ','\'',$(NO_GETTEXT)))'\' >>$@+
>  ifdef GIT_PERF_REPEAT_COUNT
> diff --git a/ci/lib.sh b/ci/lib.sh
> index d848c036c5..476c3f369f 100755
> --- a/ci/lib.sh
> +++ b/ci/lib.sh
> @@ -229,6 +229,7 @@ linux-musl)
>  	CC=gcc
>  	MAKEFLAGS="$MAKEFLAGS PYTHON_PATH=/usr/bin/python3 USE_LIBPCRE2=Yes"
>  	MAKEFLAGS="$MAKEFLAGS NO_REGEX=Yes ICONV_OMITS_BOM=Yes"
> +	MAKEFLAGS="$MAKEFLAGS GIT_TEST_UTF8_LOCALE=C.UTF-8"
>  	;;
>  esac
>
> diff --git a/t/lib-git-svn.sh b/t/lib-git-svn.sh
> index 547eb3c31a..83efc17661 100644
> --- a/t/lib-git-svn.sh
> +++ b/t/lib-git-svn.sh
> @@ -121,12 +121,24 @@ start_svnserve () {
>  		 --listen-host 127.0.0.1 &
>  }
>
> -prepare_a_utf8_locale () {
> -	a_utf8_locale=$(locale -a | sed -n '/\.[uU][tT][fF]-*8$/{
> -	p
> -	q
> -}')
> -	if test -n "$a_utf8_locale"
> +prepare_utf8_locale () {
> +	if test -n "$GIT_TEST_UTF8_LOCALE"
> +	then
> +		: test_set_prereq UTF8
> +	elif test -n "${LC_ALL:-$LANG}"
> +	then
> +		case "${LC_ALL:-$LANG}" in
> +		*.[Uu][Tt][Ff]8 | *.[Uu][Tt][Ff]-8)
> +			GIT_TEST_UTF8_LOCALE="${LC_ALL:-$LANG}"
> +			;;
> +		esac
> +	else
> +		GIT_TEST_UTF8_LOCALE=$(locale -a | sed -n '/\.[uU][tT][fF]-*8$/{
> +		p
> +		q
> +	}')
> +	fi
> +	if test -n "$GIT_TEST_UTF8_LOCALE"
>  	then
>  		test_set_prereq UTF8
>  	else
> diff --git a/t/t9100-git-svn-basic.sh b/t/t9100-git-svn-basic.sh
> index 1d3fdcc997..d5563ec35f 100755
> --- a/t/t9100-git-svn-basic.sh
> +++ b/t/t9100-git-svn-basic.sh
> @@ -4,21 +4,13 @@
>  #
>
>  test_description='git svn basic tests'
> -GIT_SVN_LC_ALL=${LC_ALL:-$LANG}
>
>  GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
>  export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
>
>  . ./lib-git-svn.sh
>
> -case "$GIT_SVN_LC_ALL" in
> -*.UTF-8)
> -	test_set_prereq UTF8
> -	;;
> -*)
> -	say "# UTF-8 locale not set, some tests skipped ($GIT_SVN_LC_ALL)"
> -	;;
> -esac
> +prepare_utf8_locale
>
>  test_expect_success 'git svn --version works anywhere' '
>  	nongit git svn --version
> @@ -187,8 +179,8 @@ test_expect_success POSIXPERM,SYMLINKS "$name" '
>  	test ! -h "$SVN_TREE"/exec-2.sh &&
>  	test_cmp help "$SVN_TREE"/exec-2.sh'
>
> -name="commit with UTF-8 message: locale: $GIT_SVN_LC_ALL"
> -LC_ALL="$GIT_SVN_LC_ALL"
> +name="commit with UTF-8 message: locale: $GIT_TEST_UTF8_LOCALE"
> +LC_ALL="$GIT_TEST_UTF8_LOCALE"
>  export LC_ALL
>  # This test relies on the previous test, hence requires POSIXPERM,SYMLINKS
>  test_expect_success UTF8,POSIXPERM,SYMLINKS "$name" "
> diff --git a/t/t9115-git-svn-dcommit-funky-renames.sh b/t/t9115-git-svn-dcommit-funky-renames.sh
> index 9b44a44bc1..743fbe1fe4 100755
> --- a/t/t9115-git-svn-dcommit-funky-renames.sh
> +++ b/t/t9115-git-svn-dcommit-funky-renames.sh
> @@ -93,9 +93,9 @@ test_expect_success 'git svn rebase works inside a fresh-cloned repository' '
>  # > ... All of the above characters, except for the backslash, are converted
>  # > to special UNICODE characters in the range 0xf000 to 0xf0ff (the
>  # > "Private use area") when creating or accessing files.
> -prepare_a_utf8_locale
> +prepare_utf8_locale
>  test_expect_success UTF8,!MINGW,!UTF8_NFD_TO_NFC 'svn.pathnameencoding=cp932 new file on dcommit' '
> -	LC_ALL=$a_utf8_locale &&
> +	LC_ALL=$GIT_TEST_UTF8_LOCALE &&
>  	export LC_ALL &&
>  	neq=$(printf "\201\202") &&
>  	git config svn.pathnameencoding cp932 &&
> @@ -107,7 +107,7 @@ test_expect_success UTF8,!MINGW,!UTF8_NFD_TO_NFC 'svn.pathnameencoding=cp932 new
>
>  # See the comment on the above test for setting of LC_ALL.
>  test_expect_success !MINGW,!UTF8_NFD_TO_NFC 'svn.pathnameencoding=cp932 rename on dcommit' '
> -	LC_ALL=$a_utf8_locale &&
> +	LC_ALL=$GIT_TEST_UTF8_LOCALE &&
>  	export LC_ALL &&
>  	inf=$(printf "\201\207") &&
>  	git config svn.pathnameencoding cp932 &&
> diff --git a/t/t9129-git-svn-i18n-commitencoding.sh b/t/t9129-git-svn-i18n-commitencoding.sh
> index 2c213ae654..01e1e8a8f7 100755
> --- a/t/t9129-git-svn-i18n-commitencoding.sh
> +++ b/t/t9129-git-svn-i18n-commitencoding.sh
> @@ -14,12 +14,12 @@ compare_git_head_with () {
>  	test_cmp current "$1"
>  }
>
> -prepare_a_utf8_locale
> +prepare_utf8_locale
>
>  compare_svn_head_with () {
>  	# extract just the log message and strip out committer info.
>  	# don't use --limit here since svn 1.1.x doesn't have it,
> -	LC_ALL="$a_utf8_locale" svn log $(git svn info --url) | perl -w -e '
> +	LC_ALL="$GIT_TEST_UTF8_LOCALE" svn log $(git svn info --url) | perl -w -e '
>  		use bytes;
>  		$/ = ("-"x72) . "\n";
>  		my @x = <STDIN>;
> --
> 2.32.0.rc3.5.gf3d78db977
>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2] t: use user-specific utf-8 locale for testing
  2021-06-06 20:06   ` Torsten Bögershausen
@ 2021-06-07  0:20     ` Junio C Hamano
  0 siblings, 0 replies; 19+ messages in thread
From: Junio C Hamano @ 2021-06-07  0:20 UTC (permalink / raw)
  To: Torsten Bögershausen
  Cc: Đoàn Trần Công Danh, git, Taylor Blau,
	Jeff King, Bagas Sanjaya

Torsten Bögershausen <tboegi@web.de> writes:

> This all looks good.
> Some suggestions about the commit message are inline.

All good comments.

I have another one on the title.

> Subject: Re: [PATCH v2] t: use user-specific utf-8 locale for testing

If we auto-detect to customize which locale to use per user, that is
already user-specific; letting the user to explicitly tell us which
locale to use, that is "use user-specified UTF-8 locale".

This affects only SVN tests and the change is in lib-git-svn, so
when we test other parts of the system for UTF-8, this change will
not directly help, right?  Perhaps we want to mention SVN somewhere
on the title?  Perhaps "for testing git-svn"?

Thanks.



^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH v3] t: use pre-defined utf-8 locale for testing svn
  2021-06-02 11:46 [PATCH] t: use user-specific utf-8 locale for testing Đoàn Trần Công Danh
                   ` (3 preceding siblings ...)
  2021-06-06 16:33 ` [PATCH v2] " Đoàn Trần Công Danh
@ 2021-06-07  0:48 ` Đoàn Trần Công Danh
  2021-06-07  1:01   ` Junio C Hamano
  2021-06-07  1:08 ` [PATCH v4] t: use user-specified " Đoàn Trần Công Danh
  2021-06-08  6:56 ` [PATCH v5] " Đoàn Trần Công Danh
  6 siblings, 1 reply; 19+ messages in thread
From: Đoàn Trần Công Danh @ 2021-06-07  0:48 UTC (permalink / raw)
  To: git
  Cc: Đoàn Trần Công Danh, Taylor Blau, Jeff King,
	Bagas Sanjaya, Torsten Bögershausen, Junio C Hamano

In some test-cases, UTF-8 locale is required. To find such locale,
we're using the first available UTF-8 locale that returned by
"locale -a".

However, the locale(1) utility is unavailable on some systems,
e.g. Linux with musl libc.

However, without "locale -a", we can't guess provided UTF-8 locale.

Add a Makefile knob GIT_TEST_UTF8_LOCALE and activate it for
linux-musl in our CI system.

Rename t/lib-git-svn.sh:prepare_a_utf8_locale to prepare_utf8_locale,
since we no longer prepare the variable named "a_utf8_locale",
but set up a fallback value for GIT_TEST_UTF8_LOCALE instead.
The fallback will be LC_ALL, LANG environment variable,
or the first UTF-8 locale from output of "locale -a", in that order.

Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
---

Jeff King wrote:

> I kind of wonder if trying "C.UTF-8" would be a reasonable fallback so
> that people don't even have to set this extra Makefile knob. But I'm not
> sure if we have a good way of testing if that locale works (if we can't
> find the "locale" binary).

I also think we should fallback to "C.UTF-8" instead of not testing those
tests.  However, I don't know if there're any systems that not have "C.UTF-8"
locale.


Range-diff against v2:
1:  f299ae2239 ! 1:  064cfc0fe3 t: use user-specific utf-8 locale for testing
    @@ Metadata
     Author: Đoàn Trần Công Danh <congdanhqx@gmail.com>
     
      ## Commit message ##
    -    t: use user-specific utf-8 locale for testing
    +    t: use pre-defined utf-8 locale for testing svn
     
    -    In some test-cases, utf-8 locale is required. To find such locale,
    +    In some test-cases, UTF-8 locale is required. To find such locale,
         we're using the first available UTF-8 locale that returned by
         "locale -a".
     
    -    Despite being required by POSIX, locale(1) is unavailable in some
    -    systems, e.g. Linux with musl libc.  Some of those systems support
    -    utf-8 locale out of the box.
    +    However, the locale(1) utility is unavailable on some systems,
    +    e.g. Linux with musl libc.
     
         However, without "locale -a", we can't guess provided UTF-8 locale.
     
    -    Let's give users of those systems an option to have better test
    -    coverage.
    +    Add a Makefile knob GIT_TEST_UTF8_LOCALE and activate it for
    +    linux-musl in our CI system.
     
    -    This change also rename t/lib-git-svn.sh:prepare_a_utf8_locale to
    -    prepare_utf8_locale, since we no longer prepare the variable named
    -    "a_utf8_locale" but set up a fallback value for GIT_TEST_UTF8_LOCALE
    -    instead.  The fallback will be LC_ALL, LANG environment variable,
    -    or the first utf-8 locale from output of "locale -a", in that order.
    +    Rename t/lib-git-svn.sh:prepare_a_utf8_locale to prepare_utf8_locale,
    +    since we no longer prepare the variable named "a_utf8_locale",
    +    but set up a fallback value for GIT_TEST_UTF8_LOCALE instead.
    +    The fallback will be LC_ALL, LANG environment variable,
    +    or the first UTF-8 locale from output of "locale -a", in that order.
     
         Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
     

 Makefile                                 |  7 +++++++
 ci/lib.sh                                |  1 +
 t/lib-git-svn.sh                         | 24 ++++++++++++++++++------
 t/t9100-git-svn-basic.sh                 | 14 +++-----------
 t/t9115-git-svn-dcommit-funky-renames.sh |  6 +++---
 t/t9129-git-svn-i18n-commitencoding.sh   |  4 ++--
 6 files changed, 34 insertions(+), 22 deletions(-)

diff --git a/Makefile b/Makefile
index c3565fc0f8..502e0c9a81 100644
--- a/Makefile
+++ b/Makefile
@@ -398,6 +398,10 @@ all::
 # with a different indexfile format version.  If it isn't set the index
 # file format used is index-v[23].
 #
+# Define GIT_TEST_UTF8_LOCALE to preferred utf-8 locale for testing.
+# If it isn't set, fallback to $LC_ALL, $LANG or use the first utf-8
+# locale returned by "locale -a".
+#
 # Define HAVE_CLOCK_GETTIME if your platform has clock_gettime.
 #
 # Define HAVE_CLOCK_MONOTONIC if your platform has CLOCK_MONOTONIC.
@@ -2801,6 +2805,9 @@ ifdef GIT_TEST_CMP
 endif
 ifdef GIT_TEST_CMP_USE_COPIED_CONTEXT
 	@echo GIT_TEST_CMP_USE_COPIED_CONTEXT=YesPlease >>$@+
+endif
+ifdef GIT_TEST_UTF8_LOCALE
+	@echo GIT_TEST_UTF8_LOCALE=\''$(subst ','\'',$(subst ','\'',$(GIT_TEST_UTF8_LOCALE)))'\' >>$@+
 endif
 	@echo NO_GETTEXT=\''$(subst ','\'',$(subst ','\'',$(NO_GETTEXT)))'\' >>$@+
 ifdef GIT_PERF_REPEAT_COUNT
diff --git a/ci/lib.sh b/ci/lib.sh
index d848c036c5..476c3f369f 100755
--- a/ci/lib.sh
+++ b/ci/lib.sh
@@ -229,6 +229,7 @@ linux-musl)
 	CC=gcc
 	MAKEFLAGS="$MAKEFLAGS PYTHON_PATH=/usr/bin/python3 USE_LIBPCRE2=Yes"
 	MAKEFLAGS="$MAKEFLAGS NO_REGEX=Yes ICONV_OMITS_BOM=Yes"
+	MAKEFLAGS="$MAKEFLAGS GIT_TEST_UTF8_LOCALE=C.UTF-8"
 	;;
 esac
 
diff --git a/t/lib-git-svn.sh b/t/lib-git-svn.sh
index 547eb3c31a..83efc17661 100644
--- a/t/lib-git-svn.sh
+++ b/t/lib-git-svn.sh
@@ -121,12 +121,24 @@ start_svnserve () {
 		 --listen-host 127.0.0.1 &
 }
 
-prepare_a_utf8_locale () {
-	a_utf8_locale=$(locale -a | sed -n '/\.[uU][tT][fF]-*8$/{
-	p
-	q
-}')
-	if test -n "$a_utf8_locale"
+prepare_utf8_locale () {
+	if test -n "$GIT_TEST_UTF8_LOCALE"
+	then
+		: test_set_prereq UTF8
+	elif test -n "${LC_ALL:-$LANG}"
+	then
+		case "${LC_ALL:-$LANG}" in
+		*.[Uu][Tt][Ff]8 | *.[Uu][Tt][Ff]-8)
+			GIT_TEST_UTF8_LOCALE="${LC_ALL:-$LANG}"
+			;;
+		esac
+	else
+		GIT_TEST_UTF8_LOCALE=$(locale -a | sed -n '/\.[uU][tT][fF]-*8$/{
+		p
+		q
+	}')
+	fi
+	if test -n "$GIT_TEST_UTF8_LOCALE"
 	then
 		test_set_prereq UTF8
 	else
diff --git a/t/t9100-git-svn-basic.sh b/t/t9100-git-svn-basic.sh
index 1d3fdcc997..d5563ec35f 100755
--- a/t/t9100-git-svn-basic.sh
+++ b/t/t9100-git-svn-basic.sh
@@ -4,21 +4,13 @@
 #
 
 test_description='git svn basic tests'
-GIT_SVN_LC_ALL=${LC_ALL:-$LANG}
 
 GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
 export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
 
 . ./lib-git-svn.sh
 
-case "$GIT_SVN_LC_ALL" in
-*.UTF-8)
-	test_set_prereq UTF8
-	;;
-*)
-	say "# UTF-8 locale not set, some tests skipped ($GIT_SVN_LC_ALL)"
-	;;
-esac
+prepare_utf8_locale
 
 test_expect_success 'git svn --version works anywhere' '
 	nongit git svn --version
@@ -187,8 +179,8 @@ test_expect_success POSIXPERM,SYMLINKS "$name" '
 	test ! -h "$SVN_TREE"/exec-2.sh &&
 	test_cmp help "$SVN_TREE"/exec-2.sh'
 
-name="commit with UTF-8 message: locale: $GIT_SVN_LC_ALL"
-LC_ALL="$GIT_SVN_LC_ALL"
+name="commit with UTF-8 message: locale: $GIT_TEST_UTF8_LOCALE"
+LC_ALL="$GIT_TEST_UTF8_LOCALE"
 export LC_ALL
 # This test relies on the previous test, hence requires POSIXPERM,SYMLINKS
 test_expect_success UTF8,POSIXPERM,SYMLINKS "$name" "
diff --git a/t/t9115-git-svn-dcommit-funky-renames.sh b/t/t9115-git-svn-dcommit-funky-renames.sh
index 9b44a44bc1..743fbe1fe4 100755
--- a/t/t9115-git-svn-dcommit-funky-renames.sh
+++ b/t/t9115-git-svn-dcommit-funky-renames.sh
@@ -93,9 +93,9 @@ test_expect_success 'git svn rebase works inside a fresh-cloned repository' '
 # > ... All of the above characters, except for the backslash, are converted
 # > to special UNICODE characters in the range 0xf000 to 0xf0ff (the
 # > "Private use area") when creating or accessing files.
-prepare_a_utf8_locale
+prepare_utf8_locale
 test_expect_success UTF8,!MINGW,!UTF8_NFD_TO_NFC 'svn.pathnameencoding=cp932 new file on dcommit' '
-	LC_ALL=$a_utf8_locale &&
+	LC_ALL=$GIT_TEST_UTF8_LOCALE &&
 	export LC_ALL &&
 	neq=$(printf "\201\202") &&
 	git config svn.pathnameencoding cp932 &&
@@ -107,7 +107,7 @@ test_expect_success UTF8,!MINGW,!UTF8_NFD_TO_NFC 'svn.pathnameencoding=cp932 new
 
 # See the comment on the above test for setting of LC_ALL.
 test_expect_success !MINGW,!UTF8_NFD_TO_NFC 'svn.pathnameencoding=cp932 rename on dcommit' '
-	LC_ALL=$a_utf8_locale &&
+	LC_ALL=$GIT_TEST_UTF8_LOCALE &&
 	export LC_ALL &&
 	inf=$(printf "\201\207") &&
 	git config svn.pathnameencoding cp932 &&
diff --git a/t/t9129-git-svn-i18n-commitencoding.sh b/t/t9129-git-svn-i18n-commitencoding.sh
index 2c213ae654..01e1e8a8f7 100755
--- a/t/t9129-git-svn-i18n-commitencoding.sh
+++ b/t/t9129-git-svn-i18n-commitencoding.sh
@@ -14,12 +14,12 @@ compare_git_head_with () {
 	test_cmp current "$1"
 }
 
-prepare_a_utf8_locale
+prepare_utf8_locale
 
 compare_svn_head_with () {
 	# extract just the log message and strip out committer info.
 	# don't use --limit here since svn 1.1.x doesn't have it,
-	LC_ALL="$a_utf8_locale" svn log $(git svn info --url) | perl -w -e '
+	LC_ALL="$GIT_TEST_UTF8_LOCALE" svn log $(git svn info --url) | perl -w -e '
 		use bytes;
 		$/ = ("-"x72) . "\n";
 		my @x = <STDIN>;
-- 
2.32.0.rc3.5.gf3d78db977


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v3] t: use pre-defined utf-8 locale for testing svn
  2021-06-07  0:48 ` [PATCH v3] t: use pre-defined utf-8 locale for testing svn Đoàn Trần Công Danh
@ 2021-06-07  1:01   ` Junio C Hamano
  2021-06-07 14:38     ` Torsten Bögershausen
  2021-06-08  6:35     ` Jeff King
  0 siblings, 2 replies; 19+ messages in thread
From: Junio C Hamano @ 2021-06-07  1:01 UTC (permalink / raw)
  To: Đoàn Trần Công Danh
  Cc: git, Taylor Blau, Jeff King, Bagas Sanjaya, Torsten Bögershausen

Đoàn Trần Công Danh  <congdanhqx@gmail.com> writes:

> Subject: Re: [PATCH v3] t: use pre-defined utf-8 locale for testing svn

That certainly is better than "user-specific", but a lot worse than
"user-specified".  "pre-defined" leaves it open to question "who
defines it?", and it is a reasonable interpretation that the locale
may be hardcoded in the makefile, but that is not what this patch
does.  Saying "user-specified" would not have such a problem.

>> I kind of wonder if trying "C.UTF-8" would be a reasonable fallback so
>> that people don't even have to set this extra Makefile knob. But I'm not
>> sure if we have a good way of testing if that locale works (if we can't
>> find the "locale" binary).
>
> I also think we should fallback to "C.UTF-8" instead of not testing those
> tests.  However, I don't know if there're any systems that not have "C.UTF-8"
> locale.

I do share the feeling, but have a hunch that systems lacking
"locale -a" may be either superset of, or has at least large overlap
with, those lacking "C.UTF-8", and the new mechanism introduced here
will cover both of them, so I think it is OK to stop here, at least
for now.

Thanks.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH v4] t: use user-specified utf-8 locale for testing svn
  2021-06-02 11:46 [PATCH] t: use user-specific utf-8 locale for testing Đoàn Trần Công Danh
                   ` (4 preceding siblings ...)
  2021-06-07  0:48 ` [PATCH v3] t: use pre-defined utf-8 locale for testing svn Đoàn Trần Công Danh
@ 2021-06-07  1:08 ` Đoàn Trần Công Danh
  2021-06-08  6:38   ` Jeff King
  2021-06-08  6:56 ` [PATCH v5] " Đoàn Trần Công Danh
  6 siblings, 1 reply; 19+ messages in thread
From: Đoàn Trần Công Danh @ 2021-06-07  1:08 UTC (permalink / raw)
  To: git
  Cc: Đoàn Trần Công Danh, Taylor Blau, Jeff King,
	Bagas Sanjaya, Torsten Bögershausen, Junio C Hamano

In some test-cases, UTF-8 locale is required. To find such locale,
we're using the first available UTF-8 locale that returned by
"locale -a".

However, the locale(1) utility is unavailable on some systems,
e.g. Linux with musl libc.

However, without "locale -a", we can't guess provided UTF-8 locale.

Add a Makefile knob GIT_TEST_UTF8_LOCALE and activate it for
linux-musl in our CI system.

Rename t/lib-git-svn.sh:prepare_a_utf8_locale to prepare_utf8_locale,
since we no longer prepare the variable named "a_utf8_locale",
but set up a fallback value for GIT_TEST_UTF8_LOCALE instead.
The fallback will be LC_ALL, LANG environment variable,
or the first UTF-8 locale from output of "locale -a", in that order.

Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
---

Junio C. Hamano wrote:

> That certainly is better than "user-specific", but a lot worse than
> "user-specified".  "pre-defined" leaves it open to question "who
> defines it?", and it is a reasonable interpretation that the locale
> may be hardcoded in the makefile, but that is not what this patch
> does.  Saying "user-specified" would not have such a problem.

Ahh, I scanned your comment too fast. Sorry for the noise.

Thanks for the quick turn-around.

Range-diff against v3:
1:  064cfc0fe3 ! 1:  6036f670d9 t: use pre-defined utf-8 locale for testing svn
    @@ Metadata
     Author: Đoàn Trần Công Danh <congdanhqx@gmail.com>
     
      ## Commit message ##
    -    t: use pre-defined utf-8 locale for testing svn
    +    t: use user-specified utf-8 locale for testing svn
     
         In some test-cases, UTF-8 locale is required. To find such locale,
         we're using the first available UTF-8 locale that returned by

 Makefile                                 |  7 +++++++
 ci/lib.sh                                |  1 +
 t/lib-git-svn.sh                         | 24 ++++++++++++++++++------
 t/t9100-git-svn-basic.sh                 | 14 +++-----------
 t/t9115-git-svn-dcommit-funky-renames.sh |  6 +++---
 t/t9129-git-svn-i18n-commitencoding.sh   |  4 ++--
 6 files changed, 34 insertions(+), 22 deletions(-)

diff --git a/Makefile b/Makefile
index c3565fc0f8..502e0c9a81 100644
--- a/Makefile
+++ b/Makefile
@@ -398,6 +398,10 @@ all::
 # with a different indexfile format version.  If it isn't set the index
 # file format used is index-v[23].
 #
+# Define GIT_TEST_UTF8_LOCALE to preferred utf-8 locale for testing.
+# If it isn't set, fallback to $LC_ALL, $LANG or use the first utf-8
+# locale returned by "locale -a".
+#
 # Define HAVE_CLOCK_GETTIME if your platform has clock_gettime.
 #
 # Define HAVE_CLOCK_MONOTONIC if your platform has CLOCK_MONOTONIC.
@@ -2801,6 +2805,9 @@ ifdef GIT_TEST_CMP
 endif
 ifdef GIT_TEST_CMP_USE_COPIED_CONTEXT
 	@echo GIT_TEST_CMP_USE_COPIED_CONTEXT=YesPlease >>$@+
+endif
+ifdef GIT_TEST_UTF8_LOCALE
+	@echo GIT_TEST_UTF8_LOCALE=\''$(subst ','\'',$(subst ','\'',$(GIT_TEST_UTF8_LOCALE)))'\' >>$@+
 endif
 	@echo NO_GETTEXT=\''$(subst ','\'',$(subst ','\'',$(NO_GETTEXT)))'\' >>$@+
 ifdef GIT_PERF_REPEAT_COUNT
diff --git a/ci/lib.sh b/ci/lib.sh
index d848c036c5..476c3f369f 100755
--- a/ci/lib.sh
+++ b/ci/lib.sh
@@ -229,6 +229,7 @@ linux-musl)
 	CC=gcc
 	MAKEFLAGS="$MAKEFLAGS PYTHON_PATH=/usr/bin/python3 USE_LIBPCRE2=Yes"
 	MAKEFLAGS="$MAKEFLAGS NO_REGEX=Yes ICONV_OMITS_BOM=Yes"
+	MAKEFLAGS="$MAKEFLAGS GIT_TEST_UTF8_LOCALE=C.UTF-8"
 	;;
 esac
 
diff --git a/t/lib-git-svn.sh b/t/lib-git-svn.sh
index 547eb3c31a..83efc17661 100644
--- a/t/lib-git-svn.sh
+++ b/t/lib-git-svn.sh
@@ -121,12 +121,24 @@ start_svnserve () {
 		 --listen-host 127.0.0.1 &
 }
 
-prepare_a_utf8_locale () {
-	a_utf8_locale=$(locale -a | sed -n '/\.[uU][tT][fF]-*8$/{
-	p
-	q
-}')
-	if test -n "$a_utf8_locale"
+prepare_utf8_locale () {
+	if test -n "$GIT_TEST_UTF8_LOCALE"
+	then
+		: test_set_prereq UTF8
+	elif test -n "${LC_ALL:-$LANG}"
+	then
+		case "${LC_ALL:-$LANG}" in
+		*.[Uu][Tt][Ff]8 | *.[Uu][Tt][Ff]-8)
+			GIT_TEST_UTF8_LOCALE="${LC_ALL:-$LANG}"
+			;;
+		esac
+	else
+		GIT_TEST_UTF8_LOCALE=$(locale -a | sed -n '/\.[uU][tT][fF]-*8$/{
+		p
+		q
+	}')
+	fi
+	if test -n "$GIT_TEST_UTF8_LOCALE"
 	then
 		test_set_prereq UTF8
 	else
diff --git a/t/t9100-git-svn-basic.sh b/t/t9100-git-svn-basic.sh
index 1d3fdcc997..d5563ec35f 100755
--- a/t/t9100-git-svn-basic.sh
+++ b/t/t9100-git-svn-basic.sh
@@ -4,21 +4,13 @@
 #
 
 test_description='git svn basic tests'
-GIT_SVN_LC_ALL=${LC_ALL:-$LANG}
 
 GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
 export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
 
 . ./lib-git-svn.sh
 
-case "$GIT_SVN_LC_ALL" in
-*.UTF-8)
-	test_set_prereq UTF8
-	;;
-*)
-	say "# UTF-8 locale not set, some tests skipped ($GIT_SVN_LC_ALL)"
-	;;
-esac
+prepare_utf8_locale
 
 test_expect_success 'git svn --version works anywhere' '
 	nongit git svn --version
@@ -187,8 +179,8 @@ test_expect_success POSIXPERM,SYMLINKS "$name" '
 	test ! -h "$SVN_TREE"/exec-2.sh &&
 	test_cmp help "$SVN_TREE"/exec-2.sh'
 
-name="commit with UTF-8 message: locale: $GIT_SVN_LC_ALL"
-LC_ALL="$GIT_SVN_LC_ALL"
+name="commit with UTF-8 message: locale: $GIT_TEST_UTF8_LOCALE"
+LC_ALL="$GIT_TEST_UTF8_LOCALE"
 export LC_ALL
 # This test relies on the previous test, hence requires POSIXPERM,SYMLINKS
 test_expect_success UTF8,POSIXPERM,SYMLINKS "$name" "
diff --git a/t/t9115-git-svn-dcommit-funky-renames.sh b/t/t9115-git-svn-dcommit-funky-renames.sh
index 9b44a44bc1..743fbe1fe4 100755
--- a/t/t9115-git-svn-dcommit-funky-renames.sh
+++ b/t/t9115-git-svn-dcommit-funky-renames.sh
@@ -93,9 +93,9 @@ test_expect_success 'git svn rebase works inside a fresh-cloned repository' '
 # > ... All of the above characters, except for the backslash, are converted
 # > to special UNICODE characters in the range 0xf000 to 0xf0ff (the
 # > "Private use area") when creating or accessing files.
-prepare_a_utf8_locale
+prepare_utf8_locale
 test_expect_success UTF8,!MINGW,!UTF8_NFD_TO_NFC 'svn.pathnameencoding=cp932 new file on dcommit' '
-	LC_ALL=$a_utf8_locale &&
+	LC_ALL=$GIT_TEST_UTF8_LOCALE &&
 	export LC_ALL &&
 	neq=$(printf "\201\202") &&
 	git config svn.pathnameencoding cp932 &&
@@ -107,7 +107,7 @@ test_expect_success UTF8,!MINGW,!UTF8_NFD_TO_NFC 'svn.pathnameencoding=cp932 new
 
 # See the comment on the above test for setting of LC_ALL.
 test_expect_success !MINGW,!UTF8_NFD_TO_NFC 'svn.pathnameencoding=cp932 rename on dcommit' '
-	LC_ALL=$a_utf8_locale &&
+	LC_ALL=$GIT_TEST_UTF8_LOCALE &&
 	export LC_ALL &&
 	inf=$(printf "\201\207") &&
 	git config svn.pathnameencoding cp932 &&
diff --git a/t/t9129-git-svn-i18n-commitencoding.sh b/t/t9129-git-svn-i18n-commitencoding.sh
index 2c213ae654..01e1e8a8f7 100755
--- a/t/t9129-git-svn-i18n-commitencoding.sh
+++ b/t/t9129-git-svn-i18n-commitencoding.sh
@@ -14,12 +14,12 @@ compare_git_head_with () {
 	test_cmp current "$1"
 }
 
-prepare_a_utf8_locale
+prepare_utf8_locale
 
 compare_svn_head_with () {
 	# extract just the log message and strip out committer info.
 	# don't use --limit here since svn 1.1.x doesn't have it,
-	LC_ALL="$a_utf8_locale" svn log $(git svn info --url) | perl -w -e '
+	LC_ALL="$GIT_TEST_UTF8_LOCALE" svn log $(git svn info --url) | perl -w -e '
 		use bytes;
 		$/ = ("-"x72) . "\n";
 		my @x = <STDIN>;
-- 
2.32.0.rc3.5.gf3d78db977


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v3] t: use pre-defined utf-8 locale for testing svn
  2021-06-07  1:01   ` Junio C Hamano
@ 2021-06-07 14:38     ` Torsten Bögershausen
  2021-06-07 15:42       ` Đoàn Trần Công Danh
  2021-06-08  6:35     ` Jeff King
  1 sibling, 1 reply; 19+ messages in thread
From: Torsten Bögershausen @ 2021-06-07 14:38 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Đoàn Trần Công Danh, git, Taylor Blau,
	Jeff King, Bagas Sanjaya

On Mon, Jun 07, 2021 at 10:01:12AM +0900, Junio C Hamano wrote:
> Đoàn Trần Công Danh  <congdanhqx@gmail.com> writes:
>
> > Subject: Re: [PATCH v3] t: use pre-defined utf-8 locale for testing svn
>
> That certainly is better than "user-specific", but a lot worse than
> "user-specified".  "pre-defined" leaves it open to question "who
> defines it?", and it is a reasonable interpretation that the locale
> may be hardcoded in the makefile, but that is not what this patch
> does.  Saying "user-specified" would not have such a problem.
>
> >> I kind of wonder if trying "C.UTF-8" would be a reasonable fallback so
> >> that people don't even have to set this extra Makefile knob. But I'm not
> >> sure if we have a good way of testing if that locale works (if we can't
> >> find the "locale" binary).
> >
> > I also think we should fallback to "C.UTF-8" instead of not testing those
> > tests.  However, I don't know if there're any systems that not have "C.UTF-8"
> > locale.
>
> I do share the feeling, but have a hunch that systems lacking
> "locale -a" may be either superset of, or has at least large overlap
> with, those lacking "C.UTF-8", and the new mechanism introduced here
> will cover both of them, so I think it is OK to stop here, at least
> for now.
>
> Thanks.

I did a little bit of digging, the first (?) usage of locale was probably
introduced in the i18n patch to use gettext and msgfmt and stuff:

commit 5e9637c629702e3d41ad01d95956d1835d7338e0
Author: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Date:   Fri Nov 18 00:14:42 2011 +0100

    i18n: add infrastructure for translating Git with gettext

--------------------------

I can't get rid of the feeling, that most systems have an UTF-8
locale enabled by default these days.

What does the following give us under Linux musl ?
set | grep UTF


MacOs (my box here) has this:

LANG=en_US.UTF-8
LC_ALL=en_US.UTF-8
LC_CTYPE=UTF-8
XTERM_LOCALE=en_US.UTF-8

If, and only if, Linux musl has a similar setup,
then we may be able to skip the Makefile knob for the moment.
Just skip locale -a, if locale is not available.
This is just a loose idea, I need to install the git-svn bindings and
test if the git-svn tests pass.



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v3] t: use pre-defined utf-8 locale for testing svn
  2021-06-07 14:38     ` Torsten Bögershausen
@ 2021-06-07 15:42       ` Đoàn Trần Công Danh
  0 siblings, 0 replies; 19+ messages in thread
From: Đoàn Trần Công Danh @ 2021-06-07 15:42 UTC (permalink / raw)
  To: Torsten Bögershausen
  Cc: Junio C Hamano, git, Taylor Blau, Jeff King, Bagas Sanjaya

On 2021-06-07 16:38:16+0200, Torsten Bögershausen <tboegi@web.de> wrote:
> On Mon, Jun 07, 2021 at 10:01:12AM +0900, Junio C Hamano wrote:
> > Đoàn Trần Công Danh  <congdanhqx@gmail.com> writes:
> >
> > > Subject: Re: [PATCH v3] t: use pre-defined utf-8 locale for testing svn
> >
> > That certainly is better than "user-specific", but a lot worse than
> > "user-specified".  "pre-defined" leaves it open to question "who
> > defines it?", and it is a reasonable interpretation that the locale
> > may be hardcoded in the makefile, but that is not what this patch
> > does.  Saying "user-specified" would not have such a problem.
> >
> > >> I kind of wonder if trying "C.UTF-8" would be a reasonable fallback so
> > >> that people don't even have to set this extra Makefile knob. But I'm not
> > >> sure if we have a good way of testing if that locale works (if we can't
> > >> find the "locale" binary).
> > >
> > > I also think we should fallback to "C.UTF-8" instead of not testing those
> > > tests.  However, I don't know if there're any systems that not have "C.UTF-8"
> > > locale.
> >
> > I do share the feeling, but have a hunch that systems lacking
> > "locale -a" may be either superset of, or has at least large overlap
> > with, those lacking "C.UTF-8", and the new mechanism introduced here
> > will cover both of them, so I think it is OK to stop here, at least
> > for now.
> >
> > Thanks.
> 
> I did a little bit of digging, the first (?) usage of locale was probably
> introduced in the i18n patch to use gettext and msgfmt and stuff:
> 
> commit 5e9637c629702e3d41ad01d95956d1835d7338e0
> Author: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
> Date:   Fri Nov 18 00:14:42 2011 +0100
> 
>     i18n: add infrastructure for translating Git with gettext
> 
> --------------------------
> 
> I can't get rid of the feeling, that most systems have an UTF-8
> locale enabled by default these days.
> 
> What does the following give us under Linux musl ?
> set | grep UTF

It's empty.

OK, I lied.  Distributions include a file in /etc/profile.d to enable
LC_ALL/LANG.  Without that file, it's empty.  However, such use-case
is covered by our fallback to LC_ALL and LANG.

Technically, Linux with musl always knows those locales C, POSIX,
C.UTF-8

I worried more about other systems instead.

> MacOs (my box here) has this:
> 
> LANG=en_US.UTF-8
> LC_ALL=en_US.UTF-8
> LC_CTYPE=UTF-8
> XTERM_LOCALE=en_US.UTF-8
> 
> If, and only if, Linux musl has a similar setup,
> then we may be able to skip the Makefile knob for the moment.

Yes, this patch allows to skip the Makefile knob.  Because LC_ALL and
LANG will be the first and second fallback, respectively.  And we can
skip a (potentially expensive) call to "locale -a".

> Just skip locale -a, if locale is not available.
> This is just a loose idea, I need to install the git-svn bindings and
> test if the git-svn tests pass.

-- 
Danh

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v3] t: use pre-defined utf-8 locale for testing svn
  2021-06-07  1:01   ` Junio C Hamano
  2021-06-07 14:38     ` Torsten Bögershausen
@ 2021-06-08  6:35     ` Jeff King
  2021-06-08  6:45       ` Đoàn Trần Công Danh
  1 sibling, 1 reply; 19+ messages in thread
From: Jeff King @ 2021-06-08  6:35 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Đoàn Trần Công Danh, git, Taylor Blau,
	Bagas Sanjaya, Torsten Bögershausen

On Mon, Jun 07, 2021 at 10:01:12AM +0900, Junio C Hamano wrote:

> >> I kind of wonder if trying "C.UTF-8" would be a reasonable fallback so
> >> that people don't even have to set this extra Makefile knob. But I'm not
> >> sure if we have a good way of testing if that locale works (if we can't
> >> find the "locale" binary).
> >
> > I also think we should fallback to "C.UTF-8" instead of not testing those
> > tests.  However, I don't know if there're any systems that not have "C.UTF-8"
> > locale.
> 
> I do share the feeling, but have a hunch that systems lacking
> "locale -a" may be either superset of, or has at least large overlap
> with, those lacking "C.UTF-8", and the new mechanism introduced here
> will cover both of them, so I think it is OK to stop here, at least
> for now.

Yeah, I think it is OK to stop here, too. I'd worry that we'd make life
unnecessarily complicated for people testing on platforms that lack
"locale" and C.UTF-8, for little gain.

The v4 patch looks good to me.

-Peff

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v4] t: use user-specified utf-8 locale for testing svn
  2021-06-07  1:08 ` [PATCH v4] t: use user-specified " Đoàn Trần Công Danh
@ 2021-06-08  6:38   ` Jeff King
  0 siblings, 0 replies; 19+ messages in thread
From: Jeff King @ 2021-06-08  6:38 UTC (permalink / raw)
  To: Đoàn Trần Công Danh
  Cc: git, Taylor Blau, Bagas Sanjaya, Torsten Bögershausen,
	Junio C Hamano

On Mon, Jun 07, 2021 at 08:08:44AM +0700, Đoàn Trần Công Danh wrote:

> In some test-cases, UTF-8 locale is required. To find such locale,
> we're using the first available UTF-8 locale that returned by
> "locale -a".
> 
> However, the locale(1) utility is unavailable on some systems,
> e.g. Linux with musl libc.
> 
> However, without "locale -a", we can't guess provided UTF-8 locale.
> 
> Add a Makefile knob GIT_TEST_UTF8_LOCALE and activate it for
> linux-musl in our CI system.
> 
> Rename t/lib-git-svn.sh:prepare_a_utf8_locale to prepare_utf8_locale,
> since we no longer prepare the variable named "a_utf8_locale",
> but set up a fallback value for GIT_TEST_UTF8_LOCALE instead.
> The fallback will be LC_ALL, LANG environment variable,
> or the first UTF-8 locale from output of "locale -a", in that order.

This patch looks good to me. I have one tiny nit below, but I'm OK if
it's left as-is:

> +prepare_utf8_locale () {
> +	if test -n "$GIT_TEST_UTF8_LOCALE"
> +	then
> +		: test_set_prereq UTF8

This ":" is really just a comment, but since it contains code we would
like to run (but which actually gets run later), it confused me.
Something like:

  : already set by the caller, nothing to do

would have made it easier to understand.

And then the rest of the function is pretty self-explanatory.

-Peff

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v3] t: use pre-defined utf-8 locale for testing svn
  2021-06-08  6:35     ` Jeff King
@ 2021-06-08  6:45       ` Đoàn Trần Công Danh
  0 siblings, 0 replies; 19+ messages in thread
From: Đoàn Trần Công Danh @ 2021-06-08  6:45 UTC (permalink / raw)
  To: Jeff King
  Cc: Junio C Hamano, git, Taylor Blau, Bagas Sanjaya,
	Torsten Bögershausen

On 2021-06-08 02:35:30-0400, Jeff King <peff@peff.net> wrote:
> On Mon, Jun 07, 2021 at 10:01:12AM +0900, Junio C Hamano wrote:
> 
> > >> I kind of wonder if trying "C.UTF-8" would be a reasonable fallback so
> > >> that people don't even have to set this extra Makefile knob. But I'm not
> > >> sure if we have a good way of testing if that locale works (if we can't
> > >> find the "locale" binary).
> > >
> > > I also think we should fallback to "C.UTF-8" instead of not testing those
> > > tests.  However, I don't know if there're any systems that not have "C.UTF-8"
> > > locale.
> > 
> > I do share the feeling, but have a hunch that systems lacking
> > "locale -a" may be either superset of, or has at least large overlap
> > with, those lacking "C.UTF-8", and the new mechanism introduced here
> > will cover both of them, so I think it is OK to stop here, at least
> > for now.
> 
> Yeah, I think it is OK to stop here, too. I'd worry that we'd make life
> unnecessarily complicated for people testing on platforms that lack
> "locale" and C.UTF-8, for little gain.

Argh, reading this and my patch again, I figured out a regression with
my patch when either LC_ALL and LANG is set but to something not
ending with utf-8 will ignore "locale -a" completely. I'll send
a reroll later. (With this reroll, we'll ignore the dumb ":", too.

> 
> The v4 patch looks good to me.
> 
> -Peff

-- 
Danh

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH v5] t: use user-specified utf-8 locale for testing svn
  2021-06-02 11:46 [PATCH] t: use user-specific utf-8 locale for testing Đoàn Trần Công Danh
                   ` (5 preceding siblings ...)
  2021-06-07  1:08 ` [PATCH v4] t: use user-specified " Đoàn Trần Công Danh
@ 2021-06-08  6:56 ` Đoàn Trần Công Danh
  2021-06-08  7:26   ` Jeff King
  6 siblings, 1 reply; 19+ messages in thread
From: Đoàn Trần Công Danh @ 2021-06-08  6:56 UTC (permalink / raw)
  To: git
  Cc: Đoàn Trần Công Danh, Taylor Blau, Jeff King,
	Bagas Sanjaya, Torsten Bögershausen, Junio C Hamano

In some test-cases, UTF-8 locale is required. To find such locale,
we're using the first available UTF-8 locale that returned by
"locale -a".

However, the locale(1) utility is unavailable on some systems,
e.g. Linux with musl libc.

However, without "locale -a", we can't guess provided UTF-8 locale.

Add a Makefile knob GIT_TEST_UTF8_LOCALE and activate it for
linux-musl in our CI system.

Rename t/lib-git-svn.sh:prepare_a_utf8_locale to prepare_utf8_locale,
since we no longer prepare the variable named "a_utf8_locale",
but set up a fallback value for GIT_TEST_UTF8_LOCALE instead.
The fallback will be LC_ALL, LANG environment variable,
or the first UTF-8 locale from output of "locale -a", in that order.

Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
---

 New in v5: fix a regression when LC_ALL or LANG set to something that not
 ends with utf-8, e.g. "C", "locale -a" will be ignored completely.

 Thanks to Peff's comment, I didn't notice this regression earlier.

Range-diff against v4:
1:  6036f670d9 ! 1:  6675de6866 t: use user-specified utf-8 locale for testing svn
    @@ t/lib-git-svn.sh: start_svnserve () {
     -}')
     -	if test -n "$a_utf8_locale"
     +prepare_utf8_locale () {
    -+	if test -n "$GIT_TEST_UTF8_LOCALE"
    -+	then
    -+		: test_set_prereq UTF8
    -+	elif test -n "${LC_ALL:-$LANG}"
    ++	if test -z "$GIT_TEST_UTF8_LOCALE"
     +	then
     +		case "${LC_ALL:-$LANG}" in
     +		*.[Uu][Tt][Ff]8 | *.[Uu][Tt][Ff]-8)
     +			GIT_TEST_UTF8_LOCALE="${LC_ALL:-$LANG}"
     +			;;
    ++		*)
    ++			GIT_TEST_UTF8_LOCALE=$(locale -a | sed -n '/\.[uU][tT][fF]-*8$/{
    ++				p
    ++				q
    ++			}')
    ++			;;
     +		esac
    -+	else
    -+		GIT_TEST_UTF8_LOCALE=$(locale -a | sed -n '/\.[uU][tT][fF]-*8$/{
    -+		p
    -+		q
    -+	}')
     +	fi
     +	if test -n "$GIT_TEST_UTF8_LOCALE"
      	then

 Makefile                                 |  7 +++++++
 ci/lib.sh                                |  1 +
 t/lib-git-svn.sh                         | 22 ++++++++++++++++------
 t/t9100-git-svn-basic.sh                 | 14 +++-----------
 t/t9115-git-svn-dcommit-funky-renames.sh |  6 +++---
 t/t9129-git-svn-i18n-commitencoding.sh   |  4 ++--
 6 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/Makefile b/Makefile
index c3565fc0f8..502e0c9a81 100644
--- a/Makefile
+++ b/Makefile
@@ -398,6 +398,10 @@ all::
 # with a different indexfile format version.  If it isn't set the index
 # file format used is index-v[23].
 #
+# Define GIT_TEST_UTF8_LOCALE to preferred utf-8 locale for testing.
+# If it isn't set, fallback to $LC_ALL, $LANG or use the first utf-8
+# locale returned by "locale -a".
+#
 # Define HAVE_CLOCK_GETTIME if your platform has clock_gettime.
 #
 # Define HAVE_CLOCK_MONOTONIC if your platform has CLOCK_MONOTONIC.
@@ -2801,6 +2805,9 @@ ifdef GIT_TEST_CMP
 endif
 ifdef GIT_TEST_CMP_USE_COPIED_CONTEXT
 	@echo GIT_TEST_CMP_USE_COPIED_CONTEXT=YesPlease >>$@+
+endif
+ifdef GIT_TEST_UTF8_LOCALE
+	@echo GIT_TEST_UTF8_LOCALE=\''$(subst ','\'',$(subst ','\'',$(GIT_TEST_UTF8_LOCALE)))'\' >>$@+
 endif
 	@echo NO_GETTEXT=\''$(subst ','\'',$(subst ','\'',$(NO_GETTEXT)))'\' >>$@+
 ifdef GIT_PERF_REPEAT_COUNT
diff --git a/ci/lib.sh b/ci/lib.sh
index d848c036c5..476c3f369f 100755
--- a/ci/lib.sh
+++ b/ci/lib.sh
@@ -229,6 +229,7 @@ linux-musl)
 	CC=gcc
 	MAKEFLAGS="$MAKEFLAGS PYTHON_PATH=/usr/bin/python3 USE_LIBPCRE2=Yes"
 	MAKEFLAGS="$MAKEFLAGS NO_REGEX=Yes ICONV_OMITS_BOM=Yes"
+	MAKEFLAGS="$MAKEFLAGS GIT_TEST_UTF8_LOCALE=C.UTF-8"
 	;;
 esac
 
diff --git a/t/lib-git-svn.sh b/t/lib-git-svn.sh
index 547eb3c31a..2fde2353fd 100644
--- a/t/lib-git-svn.sh
+++ b/t/lib-git-svn.sh
@@ -121,12 +121,22 @@ start_svnserve () {
 		 --listen-host 127.0.0.1 &
 }
 
-prepare_a_utf8_locale () {
-	a_utf8_locale=$(locale -a | sed -n '/\.[uU][tT][fF]-*8$/{
-	p
-	q
-}')
-	if test -n "$a_utf8_locale"
+prepare_utf8_locale () {
+	if test -z "$GIT_TEST_UTF8_LOCALE"
+	then
+		case "${LC_ALL:-$LANG}" in
+		*.[Uu][Tt][Ff]8 | *.[Uu][Tt][Ff]-8)
+			GIT_TEST_UTF8_LOCALE="${LC_ALL:-$LANG}"
+			;;
+		*)
+			GIT_TEST_UTF8_LOCALE=$(locale -a | sed -n '/\.[uU][tT][fF]-*8$/{
+				p
+				q
+			}')
+			;;
+		esac
+	fi
+	if test -n "$GIT_TEST_UTF8_LOCALE"
 	then
 		test_set_prereq UTF8
 	else
diff --git a/t/t9100-git-svn-basic.sh b/t/t9100-git-svn-basic.sh
index 1d3fdcc997..d5563ec35f 100755
--- a/t/t9100-git-svn-basic.sh
+++ b/t/t9100-git-svn-basic.sh
@@ -4,21 +4,13 @@
 #
 
 test_description='git svn basic tests'
-GIT_SVN_LC_ALL=${LC_ALL:-$LANG}
 
 GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
 export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
 
 . ./lib-git-svn.sh
 
-case "$GIT_SVN_LC_ALL" in
-*.UTF-8)
-	test_set_prereq UTF8
-	;;
-*)
-	say "# UTF-8 locale not set, some tests skipped ($GIT_SVN_LC_ALL)"
-	;;
-esac
+prepare_utf8_locale
 
 test_expect_success 'git svn --version works anywhere' '
 	nongit git svn --version
@@ -187,8 +179,8 @@ test_expect_success POSIXPERM,SYMLINKS "$name" '
 	test ! -h "$SVN_TREE"/exec-2.sh &&
 	test_cmp help "$SVN_TREE"/exec-2.sh'
 
-name="commit with UTF-8 message: locale: $GIT_SVN_LC_ALL"
-LC_ALL="$GIT_SVN_LC_ALL"
+name="commit with UTF-8 message: locale: $GIT_TEST_UTF8_LOCALE"
+LC_ALL="$GIT_TEST_UTF8_LOCALE"
 export LC_ALL
 # This test relies on the previous test, hence requires POSIXPERM,SYMLINKS
 test_expect_success UTF8,POSIXPERM,SYMLINKS "$name" "
diff --git a/t/t9115-git-svn-dcommit-funky-renames.sh b/t/t9115-git-svn-dcommit-funky-renames.sh
index 9b44a44bc1..743fbe1fe4 100755
--- a/t/t9115-git-svn-dcommit-funky-renames.sh
+++ b/t/t9115-git-svn-dcommit-funky-renames.sh
@@ -93,9 +93,9 @@ test_expect_success 'git svn rebase works inside a fresh-cloned repository' '
 # > ... All of the above characters, except for the backslash, are converted
 # > to special UNICODE characters in the range 0xf000 to 0xf0ff (the
 # > "Private use area") when creating or accessing files.
-prepare_a_utf8_locale
+prepare_utf8_locale
 test_expect_success UTF8,!MINGW,!UTF8_NFD_TO_NFC 'svn.pathnameencoding=cp932 new file on dcommit' '
-	LC_ALL=$a_utf8_locale &&
+	LC_ALL=$GIT_TEST_UTF8_LOCALE &&
 	export LC_ALL &&
 	neq=$(printf "\201\202") &&
 	git config svn.pathnameencoding cp932 &&
@@ -107,7 +107,7 @@ test_expect_success UTF8,!MINGW,!UTF8_NFD_TO_NFC 'svn.pathnameencoding=cp932 new
 
 # See the comment on the above test for setting of LC_ALL.
 test_expect_success !MINGW,!UTF8_NFD_TO_NFC 'svn.pathnameencoding=cp932 rename on dcommit' '
-	LC_ALL=$a_utf8_locale &&
+	LC_ALL=$GIT_TEST_UTF8_LOCALE &&
 	export LC_ALL &&
 	inf=$(printf "\201\207") &&
 	git config svn.pathnameencoding cp932 &&
diff --git a/t/t9129-git-svn-i18n-commitencoding.sh b/t/t9129-git-svn-i18n-commitencoding.sh
index 2c213ae654..01e1e8a8f7 100755
--- a/t/t9129-git-svn-i18n-commitencoding.sh
+++ b/t/t9129-git-svn-i18n-commitencoding.sh
@@ -14,12 +14,12 @@ compare_git_head_with () {
 	test_cmp current "$1"
 }
 
-prepare_a_utf8_locale
+prepare_utf8_locale
 
 compare_svn_head_with () {
 	# extract just the log message and strip out committer info.
 	# don't use --limit here since svn 1.1.x doesn't have it,
-	LC_ALL="$a_utf8_locale" svn log $(git svn info --url) | perl -w -e '
+	LC_ALL="$GIT_TEST_UTF8_LOCALE" svn log $(git svn info --url) | perl -w -e '
 		use bytes;
 		$/ = ("-"x72) . "\n";
 		my @x = <STDIN>;
-- 
2.32.0.278.gd42b80f139


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5] t: use user-specified utf-8 locale for testing svn
  2021-06-08  6:56 ` [PATCH v5] " Đoàn Trần Công Danh
@ 2021-06-08  7:26   ` Jeff King
  0 siblings, 0 replies; 19+ messages in thread
From: Jeff King @ 2021-06-08  7:26 UTC (permalink / raw)
  To: Đoàn Trần Công Danh
  Cc: git, Taylor Blau, Bagas Sanjaya, Torsten Bögershausen,
	Junio C Hamano

On Tue, Jun 08, 2021 at 01:56:28PM +0700, Đoàn Trần Công Danh wrote:

>  New in v5: fix a regression when LC_ALL or LANG set to something that not
>  ends with utf-8, e.g. "C", "locale -a" will be ignored completely.
> 
>  Thanks to Peff's comment, I didn't notice this regression earlier.

Doh, I totally missed that, too. I'm glad my comment was helpful in that
respect, at least. :)

Your fix here looks good.

-Peff

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] t: use user-specific utf-8 locale for testing
  2021-06-02 19:56 ` Taylor Blau
@ 2021-06-08 10:49   ` Ævar Arnfjörð Bjarmason
  0 siblings, 0 replies; 19+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2021-06-08 10:49 UTC (permalink / raw)
  To: Taylor Blau; +Cc: Đoàn Trần Công Danh, git


On Wed, Jun 02 2021, Taylor Blau wrote:

> On Wed, Jun 02, 2021 at 06:46:46PM +0700, Đoàn Trần Công Danh wrote:
>> Despite being required by POSIX, locale(1) is unavailable in some
>> systems, e.g. Linux with musl libc.  Some of those systems support
>> utf-8 locale out of the box.
>
> Hmmph. I would have imagined that locale was available everywhere, but
> unfortunately not.

Small and unsolicited history lesson from a person with funny characters
in their name & language :)

Today it seems like *nix systems have always had UTF-8, but this was a
relatively late development.  

It's Plan9 that had UTF-8 from the start, on *nix systems it was
US-ASCII, and anything else was tacked on top later on.

When I started using *nix systems I belive it was quite common to have
default configurations with only ISO-8859-1 locales installed, and
certainly that's what a lot of or most users who had the need for
locales in European languages not covered by US-ASCII used by default.

This is from hazy memory, but I think it was even actively recommended
against having or using UTF-8 locales on the system. If you
e.g. connected to an IRC channel, or copy/pasted from your text editor
into an E-Mail you could easily send the other end misencodedgibberish.

Later on things like IRC channels in these languages had a "switch day",
it was a complete mess. Nowadays mostly nobody really notices or
remembers anymore these encoding issues since we've mostly got UTF-8
everywhere as a result.

I mean, at least in the case of European languages, I understand
e.g. Japanese and Chinese still have their own persistent encoding
issues related to competing standards.

Even today you can't rely on UTF-8 even on Linux systems, and I think
this has become even more true of late with minimal CI systems or other
chroot-like test environments.

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2021-06-10 10:23 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-02 11:46 [PATCH] t: use user-specific utf-8 locale for testing Đoàn Trần Công Danh
2021-06-02 19:56 ` Taylor Blau
2021-06-08 10:49   ` Ævar Arnfjörð Bjarmason
2021-06-03 19:27 ` Jeff King
2021-06-04  3:32 ` Bagas Sanjaya
2021-06-04  5:20   ` Đoàn Trần Công Danh
2021-06-06 16:33 ` [PATCH v2] " Đoàn Trần Công Danh
2021-06-06 20:06   ` Torsten Bögershausen
2021-06-07  0:20     ` Junio C Hamano
2021-06-07  0:48 ` [PATCH v3] t: use pre-defined utf-8 locale for testing svn Đoàn Trần Công Danh
2021-06-07  1:01   ` Junio C Hamano
2021-06-07 14:38     ` Torsten Bögershausen
2021-06-07 15:42       ` Đoàn Trần Công Danh
2021-06-08  6:35     ` Jeff King
2021-06-08  6:45       ` Đoàn Trần Công Danh
2021-06-07  1:08 ` [PATCH v4] t: use user-specified " Đoàn Trần Công Danh
2021-06-08  6:38   ` Jeff King
2021-06-08  6:56 ` [PATCH v5] " Đoàn Trần Công Danh
2021-06-08  7:26   ` Jeff King

git@vger.kernel.org list mirror (unofficial, one of many)

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://public-inbox.org/git
	git clone --mirror http://ou63pmih66umazou.onion/git
	git clone --mirror http://czquwvybam4bgbro.onion/git
	git clone --mirror http://hjrcffqmbrq6wope.onion/git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V1 git git/ https://public-inbox.org/git \
		git@vger.kernel.org
	public-inbox-index git

Example config snippet for mirrors.
Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.version-control.git
	nntp://7fh6tueqddpjyxjmgtdiueylzoqt6pt7hec3pukyptlmohoowvhde4yd.onion/inbox.comp.version-control.git
	nntp://ie5yzdi7fg72h7s4sdcztq5evakq23rdt33mfyfcddc5u3ndnw24ogqd.onion/inbox.comp.version-control.git
	nntp://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/inbox.comp.version-control.git
	nntp://news.gmane.io/gmane.comp.version-control.git
 note: .onion URLs require Tor: https://www.torproject.org/

code repositories for project(s) associated with this inbox:

	https://80x24.org/mirrors/git.git

AGPL code for this site: git clone https://public-inbox.org/public-inbox.git