git@vger.kernel.org list mirror (unofficial, one of many)
 help / color / Atom feed
* [RFC PATCH] ref-filter: sort detached HEAD lines firstly
@ 2019-06-06 21:38 Matthew DeVore
  2019-06-09  8:17 ` [RFC PATCH] ref-filter: sort detached HEAD lines firstlyxy Johannes Schindelin
                   ` (2 more replies)
  0 siblings, 3 replies; 18+ messages in thread
From: Matthew DeVore @ 2019-06-06 21:38 UTC (permalink / raw)
  To: git
  Cc: avarab, Johannes.Schindelin, git, olyatelezhnaya, samuel.maftoul,
	gitster, karthik.188, pclouds, sunshine, emilyshaffer, jrn,
	matvore

Before this patch, "git branch" would put "(HEAD detached...)" and "(no
branch, rebasing...)" lines before all the other branches *in most
cases* and only because of the fact that "(" is a low codepoint. This
would not hold in the Chinese locale, which uses a full-width "(" symbol
(codepoint FF08). This meant that the detached HEAD line would appear
after all local refs and even after the remote refs if there were any.

Deliberately sort the detached HEAD refs before other refs when sorting
by refname rather than rely on codepoint subtleties.

Signed-off-by: Matthew DeVore <matvore@google.com>
---
 ref-filter.c           | 10 +++++++---
 t/lib-gettext.sh       | 16 +++++++++++++---
 t/t3207-branch-intl.sh | 38 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 58 insertions(+), 6 deletions(-)
 create mode 100755 t/t3207-branch-intl.sh

diff --git a/ref-filter.c b/ref-filter.c
index 8500671bc6..cbfae790f9 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -2157,25 +2157,29 @@ static int cmp_ref_sorting(struct ref_sorting *s, struct ref_array_item *a, stru
 	cmp_type cmp_type = used_atom[s->atom].type;
 	int (*cmp_fn)(const char *, const char *);
 	struct strbuf err = STRBUF_INIT;
 
 	if (get_ref_atom_value(a, s->atom, &va, &err))
 		die("%s", err.buf);
 	if (get_ref_atom_value(b, s->atom, &vb, &err))
 		die("%s", err.buf);
 	strbuf_release(&err);
 	cmp_fn = s->ignore_case ? strcasecmp : strcmp;
-	if (s->version)
+	if (s->version) {
 		cmp = versioncmp(va->s, vb->s);
-	else if (cmp_type == FIELD_STR)
+	} else if (cmp_type == FIELD_STR) {
+		if ((a->kind & FILTER_REFS_DETACHED_HEAD) !=
+				(b->kind & FILTER_REFS_DETACHED_HEAD)) {
+			return (a->kind & FILTER_REFS_DETACHED_HEAD) ? -1 : 1;
+		}
 		cmp = cmp_fn(va->s, vb->s);
-	else {
+	} else {
 		if (va->value < vb->value)
 			cmp = -1;
 		else if (va->value == vb->value)
 			cmp = cmp_fn(a->refname, b->refname);
 		else
 			cmp = 1;
 	}
 
 	return (s->reverse) ? -cmp : cmp;
 }
diff --git a/t/lib-gettext.sh b/t/lib-gettext.sh
index 2139b427ca..de08d109dc 100644
--- a/t/lib-gettext.sh
+++ b/t/lib-gettext.sh
@@ -25,23 +25,29 @@ then
 		p
 		q
 	}')
 	# is_IS.ISO8859-1 on Solaris and FreeBSD, is_IS.iso88591 on Debian
 	is_IS_iso_locale=$(locale -a 2>/dev/null |
 		sed -n '/^is_IS\.[iI][sS][oO]8859-*1$/{
 		p
 		q
 	}')
 
-	# Export them as an environment variable so the t0202/test.pl Perl
-	# test can use it too
-	export is_IS_locale is_IS_iso_locale
+	zh_CN_locale=$(locale -a 2>/dev/null |
+		sed -n '/^zh_CN\.[uU][tT][fF]-*8$/{
+		p
+		q
+	}')
+
+	# Export them as environment variables so other tests can use them
+	# too
+	export is_IS_locale is_IS_iso_locale zh_CN_locale
 
 	if test -n "$is_IS_locale" &&
 		test $GIT_INTERNAL_GETTEXT_SH_SCHEME != "fallthrough"
 	then
 		# Some of the tests need the reference Icelandic locale
 		test_set_prereq GETTEXT_LOCALE
 
 		# Exporting for t0202/test.pl
 		GETTEXT_LOCALE=1
 		export GETTEXT_LOCALE
@@ -53,11 +59,15 @@ then
 	if test -n "$is_IS_iso_locale" &&
 		test $GIT_INTERNAL_GETTEXT_SH_SCHEME != "fallthrough"
 	then
 		# Some of the tests need the reference Icelandic locale
 		test_set_prereq GETTEXT_ISO_LOCALE
 
 		say "# lib-gettext: Found '$is_IS_iso_locale' as an is_IS ISO-8859-1 locale"
 	else
 		say "# lib-gettext: No is_IS ISO-8859-1 locale available"
 	fi
+
+	if test -z "$zh_CN_locale"; then
+		say "# lib-gettext: No zh_CN UTF-8 locale available"
+	fi
 fi
diff --git a/t/t3207-branch-intl.sh b/t/t3207-branch-intl.sh
new file mode 100755
index 0000000000..9f6fcc7481
--- /dev/null
+++ b/t/t3207-branch-intl.sh
@@ -0,0 +1,38 @@
+#!/bin/sh
+
+test_description='git branch internationalization tests'
+
+. ./lib-gettext.sh
+
+test_expect_success 'init repo' '
+	git init r1 &&
+	touch r1/foo &&
+	git -C r1 add foo &&
+	git -C r1 commit -m foo
+'
+
+test_expect_success 'detached head sorts before other branches' '
+	# Ref sorting logic should put detached heads before the other
+	# branches, but this is not automatic when a branch name sorts
+	# lexically before "(" or the full-width "(" (Unicode codepoint FF08).
+	# The latter case is nearly guaranteed for the Chinese locale.
+
+	git -C r1 checkout HEAD^{} -- &&
+	git -C r1 branch !should_be_after_detached HEAD &&
+	LC_ALL=$zh_CN_locale LC_MESSAGES=$zh_CN_locale \
+		git -C r1 branch >actual &&
+	git -C r1 checkout - &&
+
+	awk "
+	# We need full-width or half-width parens on the first line.
+	NR == 1 && (/[(].*[)]/ || /\xef\xbc\x88.*\xef\xbc\x89/) {
+		found_head = 1;
+	}
+	/!should_be_after_detached/ {
+		found_control_branch = 1;
+	}
+	END { exit !found_head || !found_control_branch }
+	" actual
+'
+
+test_done
-- 
2.21.0


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH] ref-filter: sort detached HEAD lines firstlyxy
  2019-06-06 21:38 [RFC PATCH] ref-filter: sort detached HEAD lines firstly Matthew DeVore
@ 2019-06-09  8:17 ` Johannes Schindelin
  2019-06-09 16:39   ` Johannes Schindelin
  2019-06-10 23:49   ` [RFC PATCH] ref-filter: sort detached HEAD lines firstly Matthew DeVore
  2019-06-11 18:28 ` [PATCH v2 0/1] Sort " Matthew DeVore
  2019-06-18 22:29 ` [PATCH v3 0/1] Sort detached heads line firstly Matthew DeVore
  2 siblings, 2 replies; 18+ messages in thread
From: Johannes Schindelin @ 2019-06-09  8:17 UTC (permalink / raw)
  To: Matthew DeVore
  Cc: git, avarab, git, olyatelezhnaya, samuel.maftoul, gitster,
	karthik.188, pclouds, sunshine, emilyshaffer, jrn

Hi Matthew,

On Thu, 6 Jun 2019, Matthew DeVore wrote:

> Before this patch, "git branch" would put "(HEAD detached...)" and "(no
> branch, rebasing...)" lines before all the other branches *in most
> cases* and only because of the fact that "(" is a low codepoint. This
> would not hold in the Chinese locale, which uses a full-width "(" symbol
> (codepoint FF08). This meant that the detached HEAD line would appear
> after all local refs and even after the remote refs if there were any.
>
> Deliberately sort the detached HEAD refs before other refs when sorting
> by refname rather than rely on codepoint subtleties.

This description is pretty convincing!

> diff --git a/ref-filter.c b/ref-filter.c
> index 8500671bc6..cbfae790f9 100644
> --- a/ref-filter.c
> +++ b/ref-filter.c
> @@ -2157,25 +2157,29 @@ static int cmp_ref_sorting(struct ref_sorting *s, struct ref_array_item *a, stru
>  	cmp_type cmp_type = used_atom[s->atom].type;
>  	int (*cmp_fn)(const char *, const char *);
>  	struct strbuf err = STRBUF_INIT;
>
>  	if (get_ref_atom_value(a, s->atom, &va, &err))
>  		die("%s", err.buf);
>  	if (get_ref_atom_value(b, s->atom, &vb, &err))
>  		die("%s", err.buf);
>  	strbuf_release(&err);
>  	cmp_fn = s->ignore_case ? strcasecmp : strcmp;
> -	if (s->version)
> +	if (s->version) {
>  		cmp = versioncmp(va->s, vb->s);
> -	else if (cmp_type == FIELD_STR)
> +	} else if (cmp_type == FIELD_STR) {

I find that it makes sense in general to suppress one's urges regarding
introducing `{ ... }` around one-liners when the patch does not actually
require it.

For example, I found this patch harder than necessary to read because of
it.

> +		if ((a->kind & FILTER_REFS_DETACHED_HEAD) !=
> +				(b->kind & FILTER_REFS_DETACHED_HEAD)) {

So in case that both are detached...

> +			return (a->kind & FILTER_REFS_DETACHED_HEAD) ? -1 : 1;
> +		}
>  		cmp = cmp_fn(va->s, vb->s);

... we compare their commit hashes, is that right? Might be worth a code
comment.

> -	else {
> +	} else {

FWIW it would have been a much more obvious patch if it had done

 	if (s->version)
		[...]
+	else if (cmp_type == FIELD_STR &&
+		 (a->kind & FILTER_REFS_DETACHED_HEAD ||
+		  b->kind & FILTER_REFS_DETACHED_HEAD))
+		return (a->kind & FILTER_REFS_DETACHED_HEAD) ? -1 : 1;
 	else if (cmp_type == FIELD_STR)
		[...]

Maybe still worth doing.

FWIW I was *so* tempted to write

	((a->kind ^ b->kind) & FILTER_REFS_DETACHED_HEAD)

to make this code DRYer, but then, readers not intimately familiar with
Boolean arithmetic might not even know about the `^` operator, making the
code harder to read than necessary, too.

> diff --git a/t/lib-gettext.sh b/t/lib-gettext.sh
> index 2139b427ca..de08d109dc 100644
> --- a/t/lib-gettext.sh
> +++ b/t/lib-gettext.sh
> @@ -25,23 +25,29 @@ then
>  		p
>  		q
>  	}')
>  	# is_IS.ISO8859-1 on Solaris and FreeBSD, is_IS.iso88591 on Debian
>  	is_IS_iso_locale=$(locale -a 2>/dev/null |
>  		sed -n '/^is_IS\.[iI][sS][oO]8859-*1$/{
>  		p
>  		q
>  	}')
>
> -	# Export them as an environment variable so the t0202/test.pl Perl
> -	# test can use it too
> -	export is_IS_locale is_IS_iso_locale
> +	zh_CN_locale=$(locale -a 2>/dev/null |
> +		sed -n '/^zh_CN\.[uU][tT][fF]-*8$/{
> +		p
> +		q
> +	}')
> +
> +	# Export them as environment variables so other tests can use them
> +	# too
> +	export is_IS_locale is_IS_iso_locale zh_CN_locale
>
>  	if test -n "$is_IS_locale" &&
>  		test $GIT_INTERNAL_GETTEXT_SH_SCHEME != "fallthrough"
>  	then
>  		# Some of the tests need the reference Icelandic locale
>  		test_set_prereq GETTEXT_LOCALE
>
>  		# Exporting for t0202/test.pl
>  		GETTEXT_LOCALE=1
>  		export GETTEXT_LOCALE
> @@ -53,11 +59,15 @@ then
>  	if test -n "$is_IS_iso_locale" &&
>  		test $GIT_INTERNAL_GETTEXT_SH_SCHEME != "fallthrough"
>  	then
>  		# Some of the tests need the reference Icelandic locale
>  		test_set_prereq GETTEXT_ISO_LOCALE
>
>  		say "# lib-gettext: Found '$is_IS_iso_locale' as an is_IS ISO-8859-1 locale"
>  	else
>  		say "# lib-gettext: No is_IS ISO-8859-1 locale available"
>  	fi
> +
> +	if test -z "$zh_CN_locale"; then
> +		say "# lib-gettext: No zh_CN UTF-8 locale available"
> +	fi

I wonder why this hunk, unlike the previous one, does not imitate the
is_IS handling closely.

> diff --git a/t/t3207-branch-intl.sh b/t/t3207-branch-intl.sh
> new file mode 100755
> index 0000000000..9f6fcc7481
> --- /dev/null
> +++ b/t/t3207-branch-intl.sh
> @@ -0,0 +1,38 @@
> +#!/bin/sh
> +
> +test_description='git branch internationalization tests'
> +
> +. ./lib-gettext.sh
> +
> +test_expect_success 'init repo' '
> +	git init r1 &&

Why?

> +	touch r1/foo &&
> +	git -C r1 add foo &&
> +	git -C r1 commit -m foo
> +'

Why not simply `test_commit foo`?

> +test_expect_success 'detached head sorts before other branches' '
> +	# Ref sorting logic should put detached heads before the other
> +	# branches, but this is not automatic when a branch name sorts
> +	# lexically before "(" or the full-width "(" (Unicode codepoint FF08).
> +	# The latter case is nearly guaranteed for the Chinese locale.
> +
> +	git -C r1 checkout HEAD^{} -- &&
> +	git -C r1 branch !should_be_after_detached HEAD &&

I am not sure that `!` is a wise choice, as it might not be a legal file
name character everywhere. A `.` or `-` might make more sense.

> +	LC_ALL=$zh_CN_locale LC_MESSAGES=$zh_CN_locale \
> +		git -C r1 branch >actual &&
> +	git -C r1 checkout - &&

Why call `checkout` after `branch`? That's unnecessary, we do not verify
anything after that call.

> +	awk "
> +	# We need full-width or half-width parens on the first line.
> +	NR == 1 && (/[(].*[)]/ || /\xef\xbc\x88.*\xef\xbc\x89/) {
> +		found_head = 1;
> +	}
> +	/!should_be_after_detached/ {
> +		found_control_branch = 1;
> +	}
> +	END { exit !found_head || !found_control_branch }
> +	" actual

This might look beautiful for a fan of `awk`. For the vast majority of us,
this is not a good idea.

Remember, you do *not* write those tests for your own pleasure, you do
*not* write those tests in order to help you catch problems while you
develop your patches, you do *not* develop these tests in order to just
catch future breakages.

You *do* write those tests for *other* developers who you try to help in
preventing introducing regressions.

As such, you *want* the tests to be

- easy to understand for as wide a range of developers as you can make,

- quick,

- covering regressions, and *only* regressions,

- helping diagnose *and* fix regressions.

In the ideal case you won't even hear when developers found your test
helpful, and you will never, ever learn about regressions that have been
prevented.

You most frequently will hear about your tests when they did not do their
job well.

In this instance, I would have expected something like

	test_expect_lines = 3 actual &&

	head -n 1 <actual >first &&
	test_i18ngrep "detached HEAD" first &&

	tail -n 1 <actual >last &&
	grep should_be_after last

instead of the "awk-ward" code above.

Ciao,
Johannes

> +'
> +
> +test_done
> --
> 2.21.0
>
>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH] ref-filter: sort detached HEAD lines firstlyxy
  2019-06-09  8:17 ` [RFC PATCH] ref-filter: sort detached HEAD lines firstlyxy Johannes Schindelin
@ 2019-06-09 16:39   ` Johannes Schindelin
  2019-06-10 23:49   ` [RFC PATCH] ref-filter: sort detached HEAD lines firstly Matthew DeVore
  1 sibling, 0 replies; 18+ messages in thread
From: Johannes Schindelin @ 2019-06-09 16:39 UTC (permalink / raw)
  To: Matthew DeVore
  Cc: git, avarab, git, olyatelezhnaya, samuel.maftoul, gitster,
	karthik.188, pclouds, sunshine, emilyshaffer, jrn

Hi,

sorry for top-posting, but I just noticed the "firstlyxy" typo in the
subject ;-)

Ciao,
Dscho

On Sun, 9 Jun 2019, Johannes Schindelin wrote:

> Hi Matthew,
>
> On Thu, 6 Jun 2019, Matthew DeVore wrote:
>
> > Before this patch, "git branch" would put "(HEAD detached...)" and "(no
> > branch, rebasing...)" lines before all the other branches *in most
> > cases* and only because of the fact that "(" is a low codepoint. This
> > would not hold in the Chinese locale, which uses a full-width "(" symbol
> > (codepoint FF08). This meant that the detached HEAD line would appear
> > after all local refs and even after the remote refs if there were any.
> >
> > Deliberately sort the detached HEAD refs before other refs when sorting
> > by refname rather than rely on codepoint subtleties.
>
> This description is pretty convincing!
>
> > diff --git a/ref-filter.c b/ref-filter.c
> > index 8500671bc6..cbfae790f9 100644
> > --- a/ref-filter.c
> > +++ b/ref-filter.c
> > @@ -2157,25 +2157,29 @@ static int cmp_ref_sorting(struct ref_sorting *s, struct ref_array_item *a, stru
> >  	cmp_type cmp_type = used_atom[s->atom].type;
> >  	int (*cmp_fn)(const char *, const char *);
> >  	struct strbuf err = STRBUF_INIT;
> >
> >  	if (get_ref_atom_value(a, s->atom, &va, &err))
> >  		die("%s", err.buf);
> >  	if (get_ref_atom_value(b, s->atom, &vb, &err))
> >  		die("%s", err.buf);
> >  	strbuf_release(&err);
> >  	cmp_fn = s->ignore_case ? strcasecmp : strcmp;
> > -	if (s->version)
> > +	if (s->version) {
> >  		cmp = versioncmp(va->s, vb->s);
> > -	else if (cmp_type == FIELD_STR)
> > +	} else if (cmp_type == FIELD_STR) {
>
> I find that it makes sense in general to suppress one's urges regarding
> introducing `{ ... }` around one-liners when the patch does not actually
> require it.
>
> For example, I found this patch harder than necessary to read because of
> it.
>
> > +		if ((a->kind & FILTER_REFS_DETACHED_HEAD) !=
> > +				(b->kind & FILTER_REFS_DETACHED_HEAD)) {
>
> So in case that both are detached...
>
> > +			return (a->kind & FILTER_REFS_DETACHED_HEAD) ? -1 : 1;
> > +		}
> >  		cmp = cmp_fn(va->s, vb->s);
>
> ... we compare their commit hashes, is that right? Might be worth a code
> comment.
>
> > -	else {
> > +	} else {
>
> FWIW it would have been a much more obvious patch if it had done
>
>  	if (s->version)
> 		[...]
> +	else if (cmp_type == FIELD_STR &&
> +		 (a->kind & FILTER_REFS_DETACHED_HEAD ||
> +		  b->kind & FILTER_REFS_DETACHED_HEAD))
> +		return (a->kind & FILTER_REFS_DETACHED_HEAD) ? -1 : 1;
>  	else if (cmp_type == FIELD_STR)
> 		[...]
>
> Maybe still worth doing.
>
> FWIW I was *so* tempted to write
>
> 	((a->kind ^ b->kind) & FILTER_REFS_DETACHED_HEAD)
>
> to make this code DRYer, but then, readers not intimately familiar with
> Boolean arithmetic might not even know about the `^` operator, making the
> code harder to read than necessary, too.
>
> > diff --git a/t/lib-gettext.sh b/t/lib-gettext.sh
> > index 2139b427ca..de08d109dc 100644
> > --- a/t/lib-gettext.sh
> > +++ b/t/lib-gettext.sh
> > @@ -25,23 +25,29 @@ then
> >  		p
> >  		q
> >  	}')
> >  	# is_IS.ISO8859-1 on Solaris and FreeBSD, is_IS.iso88591 on Debian
> >  	is_IS_iso_locale=$(locale -a 2>/dev/null |
> >  		sed -n '/^is_IS\.[iI][sS][oO]8859-*1$/{
> >  		p
> >  		q
> >  	}')
> >
> > -	# Export them as an environment variable so the t0202/test.pl Perl
> > -	# test can use it too
> > -	export is_IS_locale is_IS_iso_locale
> > +	zh_CN_locale=$(locale -a 2>/dev/null |
> > +		sed -n '/^zh_CN\.[uU][tT][fF]-*8$/{
> > +		p
> > +		q
> > +	}')
> > +
> > +	# Export them as environment variables so other tests can use them
> > +	# too
> > +	export is_IS_locale is_IS_iso_locale zh_CN_locale
> >
> >  	if test -n "$is_IS_locale" &&
> >  		test $GIT_INTERNAL_GETTEXT_SH_SCHEME != "fallthrough"
> >  	then
> >  		# Some of the tests need the reference Icelandic locale
> >  		test_set_prereq GETTEXT_LOCALE
> >
> >  		# Exporting for t0202/test.pl
> >  		GETTEXT_LOCALE=1
> >  		export GETTEXT_LOCALE
> > @@ -53,11 +59,15 @@ then
> >  	if test -n "$is_IS_iso_locale" &&
> >  		test $GIT_INTERNAL_GETTEXT_SH_SCHEME != "fallthrough"
> >  	then
> >  		# Some of the tests need the reference Icelandic locale
> >  		test_set_prereq GETTEXT_ISO_LOCALE
> >
> >  		say "# lib-gettext: Found '$is_IS_iso_locale' as an is_IS ISO-8859-1 locale"
> >  	else
> >  		say "# lib-gettext: No is_IS ISO-8859-1 locale available"
> >  	fi
> > +
> > +	if test -z "$zh_CN_locale"; then
> > +		say "# lib-gettext: No zh_CN UTF-8 locale available"
> > +	fi
>
> I wonder why this hunk, unlike the previous one, does not imitate the
> is_IS handling closely.
>
> > diff --git a/t/t3207-branch-intl.sh b/t/t3207-branch-intl.sh
> > new file mode 100755
> > index 0000000000..9f6fcc7481
> > --- /dev/null
> > +++ b/t/t3207-branch-intl.sh
> > @@ -0,0 +1,38 @@
> > +#!/bin/sh
> > +
> > +test_description='git branch internationalization tests'
> > +
> > +. ./lib-gettext.sh
> > +
> > +test_expect_success 'init repo' '
> > +	git init r1 &&
>
> Why?
>
> > +	touch r1/foo &&
> > +	git -C r1 add foo &&
> > +	git -C r1 commit -m foo
> > +'
>
> Why not simply `test_commit foo`?
>
> > +test_expect_success 'detached head sorts before other branches' '
> > +	# Ref sorting logic should put detached heads before the other
> > +	# branches, but this is not automatic when a branch name sorts
> > +	# lexically before "(" or the full-width "(" (Unicode codepoint FF08).
> > +	# The latter case is nearly guaranteed for the Chinese locale.
> > +
> > +	git -C r1 checkout HEAD^{} -- &&
> > +	git -C r1 branch !should_be_after_detached HEAD &&
>
> I am not sure that `!` is a wise choice, as it might not be a legal file
> name character everywhere. A `.` or `-` might make more sense.
>
> > +	LC_ALL=$zh_CN_locale LC_MESSAGES=$zh_CN_locale \
> > +		git -C r1 branch >actual &&
> > +	git -C r1 checkout - &&
>
> Why call `checkout` after `branch`? That's unnecessary, we do not verify
> anything after that call.
>
> > +	awk "
> > +	# We need full-width or half-width parens on the first line.
> > +	NR == 1 && (/[(].*[)]/ || /\xef\xbc\x88.*\xef\xbc\x89/) {
> > +		found_head = 1;
> > +	}
> > +	/!should_be_after_detached/ {
> > +		found_control_branch = 1;
> > +	}
> > +	END { exit !found_head || !found_control_branch }
> > +	" actual
>
> This might look beautiful for a fan of `awk`. For the vast majority of us,
> this is not a good idea.
>
> Remember, you do *not* write those tests for your own pleasure, you do
> *not* write those tests in order to help you catch problems while you
> develop your patches, you do *not* develop these tests in order to just
> catch future breakages.
>
> You *do* write those tests for *other* developers who you try to help in
> preventing introducing regressions.
>
> As such, you *want* the tests to be
>
> - easy to understand for as wide a range of developers as you can make,
>
> - quick,
>
> - covering regressions, and *only* regressions,
>
> - helping diagnose *and* fix regressions.
>
> In the ideal case you won't even hear when developers found your test
> helpful, and you will never, ever learn about regressions that have been
> prevented.
>
> You most frequently will hear about your tests when they did not do their
> job well.
>
> In this instance, I would have expected something like
>
> 	test_expect_lines = 3 actual &&
>
> 	head -n 1 <actual >first &&
> 	test_i18ngrep "detached HEAD" first &&
>
> 	tail -n 1 <actual >last &&
> 	grep should_be_after last
>
> instead of the "awk-ward" code above.
>
> Ciao,
> Johannes
>
> > +'
> > +
> > +test_done
> > --
> > 2.21.0
> >
> >
>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH] ref-filter: sort detached HEAD lines firstly
  2019-06-09  8:17 ` [RFC PATCH] ref-filter: sort detached HEAD lines firstlyxy Johannes Schindelin
  2019-06-09 16:39   ` Johannes Schindelin
@ 2019-06-10 23:49   ` Matthew DeVore
  2019-06-11  0:41     ` Jonathan Nieder
  1 sibling, 1 reply; 18+ messages in thread
From: Matthew DeVore @ 2019-06-10 23:49 UTC (permalink / raw)
  To: Johannes Schindelin
  Cc: Matthew DeVore, git, avarab, git, olyatelezhnaya, samuel.maftoul,
	gitster, karthik.188, pclouds, sunshine, emilyshaffer, jrn

On Sun, Jun 09, 2019 at 10:17:19AM +0200, Johannes Schindelin wrote:
> >  	if (get_ref_atom_value(a, s->atom, &va, &err))
> >  		die("%s", err.buf);
> >  	if (get_ref_atom_value(b, s->atom, &vb, &err))
> >  		die("%s", err.buf);
> >  	strbuf_release(&err);
> >  	cmp_fn = s->ignore_case ? strcasecmp : strcmp;
> > -	if (s->version)
> > +	if (s->version) {
> >  		cmp = versioncmp(va->s, vb->s);
> > -	else if (cmp_type == FIELD_STR)
> > +	} else if (cmp_type == FIELD_STR) {
> 
> I find that it makes sense in general to suppress one's urges regarding
> introducing `{ ... }` around one-liners when the patch does not actually
> require it.
> 
> For example, I found this patch harder than necessary to read because of
> it.

I understand the desire to make the patch itself clean, and I sometimes try to
do that to a fault, but the style as I understand it is to put { } around all
if branches if only one branch requires it. Since I'm already modifying the
"else if (cmp_type == FIELD_STR)" line, I decided to put the } at the start of
the line and modify the if (s->version) line as well. So only one line was
modified "in excess." I think the temporary cost of the verbose patch is
justified to keep the style consistent in narrow code fragments.

> 
> > +		if ((a->kind & FILTER_REFS_DETACHED_HEAD) !=
> > +				(b->kind & FILTER_REFS_DETACHED_HEAD)) {
> 
> So in case that both are detached...
> 
> > +			return (a->kind & FILTER_REFS_DETACHED_HEAD) ? -1 : 1;
> > +		}
> >  		cmp = cmp_fn(va->s, vb->s);
> 
> ... we compare their commit hashes, is that right? Might be worth a code
> comment.
> 

It should not be possible to have two detached head lines, so adding a comment
about that may be distracting.

> > -	else {
> > +	} else {
> 
> FWIW it would have been a much more obvious patch if it had done
> 
>  	if (s->version)
> 		[...]
> +	else if (cmp_type == FIELD_STR &&
> +		 (a->kind & FILTER_REFS_DETACHED_HEAD ||
> +		  b->kind & FILTER_REFS_DETACHED_HEAD))
> +		return (a->kind & FILTER_REFS_DETACHED_HEAD) ? -1 : 1;

But that means that if a is detached, it is always a < b, even if both are
detached? That's probably right in practice, since there should only be one
detached head, but it's jarring to read a brittle cmp function.

>  	else if (cmp_type == FIELD_STR)
> 		[...]
> 
> Maybe still worth doing.
> 
> FWIW I was *so* tempted to write
> 
> 	((a->kind ^ b->kind) & FILTER_REFS_DETACHED_HEAD)
> 
> to make this code DRYer, but then, readers not intimately familiar with
> Boolean arithmetic might not even know about the `^` operator, making the
> code harder to read than necessary, too.

I think I found a readable way to DRY:

	} else if (cmp_type == FIELD_STR) {
		const int a_detached = a->kind & FILTER_REFS_DETACHED_HEAD;

		/*
		 * When sorting by name, we should put "detached" head lines,
		 * which are all the lines in parenthesis, before all others.
		 * This usually is automatic, since "(" is before "refs/" and
		 * "remotes/", but this does not hold for zh_CN, which uses
		 * full-width parenthesis, so make the ordering explicit.
		 */
		if (a_detached != (b->kind & FILTER_REFS_DETACHED_HEAD))
			cmp = a_detached ? -1 : 1;
		else
			cmp = cmp_fn(va->s, vb->s);
	} ...

> >
> >  	if test -n "$is_IS_locale" &&
> >  		test $GIT_INTERNAL_GETTEXT_SH_SCHEME != "fallthrough"
> >  	then
> >  		# Some of the tests need the reference Icelandic locale
> >  		test_set_prereq GETTEXT_LOCALE
> >
> >  		# Exporting for t0202/test.pl
> >  		GETTEXT_LOCALE=1
> >  		export GETTEXT_LOCALE
> > @@ -53,11 +59,15 @@ then
> >  	if test -n "$is_IS_iso_locale" &&
> >  		test $GIT_INTERNAL_GETTEXT_SH_SCHEME != "fallthrough"
> >  	then
> >  		# Some of the tests need the reference Icelandic locale
> >  		test_set_prereq GETTEXT_ISO_LOCALE
> >
> >  		say "# lib-gettext: Found '$is_IS_iso_locale' as an is_IS ISO-8859-1 locale"
> >  	else
> >  		say "# lib-gettext: No is_IS ISO-8859-1 locale available"
> >  	fi
> > +
> > +	if test -z "$zh_CN_locale"; then
> > +		say "# lib-gettext: No zh_CN UTF-8 locale available"
> > +	fi
> 
> I wonder why this hunk, unlike the previous one, does not imitate the
> is_IS handling closely.

It was because I didn't have gettext set up properly and it was causing
GIT_INTERNAL_GETTEXT_SH_SCHEME to be "fallthrough", despite the actual Git
output being translated. My set-up was quite broken so I don't think our intl
utility and test code needs any fixing. I've handled this and the next version
of the patch will have that fixed. (incidentally, this was why I originally
marked my patch RFC. Also incidentally, I'm using a Mac for development since
the most powerful machine I have access to is a Mac, so I've been jumping
through some hoops to make that work.)

> 
> > diff --git a/t/t3207-branch-intl.sh b/t/t3207-branch-intl.sh
> > new file mode 100755
> > index 0000000000..9f6fcc7481
> > --- /dev/null
> > +++ b/t/t3207-branch-intl.sh
> > @@ -0,0 +1,38 @@
> > +#!/bin/sh
> > +
> > +test_description='git branch internationalization tests'
> > +
> > +. ./lib-gettext.sh
> > +
> > +test_expect_success 'init repo' '
> > +	git init r1 &&
> 
> Why?

You mean why make a test repo?

> 
> > +	touch r1/foo &&
> > +	git -C r1 add foo &&
> > +	git -C r1 commit -m foo
> > +'
> 
> Why not simply `test_commit foo`?

Good idea, I'll use that.

> 
> > +test_expect_success 'detached head sorts before other branches' '
> > +	# Ref sorting logic should put detached heads before the other
> > +	# branches, but this is not automatic when a branch name sorts
> > +	# lexically before "(" or the full-width "(" (Unicode codepoint FF08).
> > +	# The latter case is nearly guaranteed for the Chinese locale.
> > +
> > +	git -C r1 checkout HEAD^{} -- &&
> > +	git -C r1 branch !should_be_after_detached HEAD &&
> 
> I am not sure that `!` is a wise choice, as it might not be a legal file
> name character everywhere. A `.` or `-` might make more sense.

The ! is actually meaningless, as I have come to observed, since the actual ref
name used for comparison is not "!should_be_after_detached" but
"refs/heads/!should_be_after_detached". So I'll remove it.

> 
> > +	LC_ALL=$zh_CN_locale LC_MESSAGES=$zh_CN_locale \
> > +		git -C r1 branch >actual &&
> > +	git -C r1 checkout - &&
> 
> Why call `checkout` after `branch`? That's unnecessary, we do not verify
> anything after that call.

It's to get the repo into a neutral state in case an additional testcase is
added in the future.

> 
> > +	awk "
> > +	# We need full-width or half-width parens on the first line.
> > +	NR == 1 && (/[(].*[)]/ || /\xef\xbc\x88.*\xef\xbc\x89/) {
> > +		found_head = 1;
> > +	}
> > +	/!should_be_after_detached/ {
> > +		found_control_branch = 1;
> > +	}
> > +	END { exit !found_head || !found_control_branch }
> > +	" actual
> 
> This might look beautiful for a fan of `awk`. For the vast majority of us,
> this is not a good idea.
> 
> Remember, you do *not* write those tests for your own pleasure, you do
> *not* write those tests in order to help you catch problems while you
> develop your patches, you do *not* develop these tests in order to just
> catch future breakages.
> 
> You *do* write those tests for *other* developers who you try to help in
> preventing introducing regressions.
> 
> As such, you *want* the tests to be
> 
> - easy to understand for as wide a range of developers as you can make,
> 
> - quick,
> 
> - covering regressions, and *only* regressions,
> 
> - helping diagnose *and* fix regressions.
> 
> In the ideal case you won't even hear when developers found your test
> helpful, and you will never, ever learn about regressions that have been
> prevented.
> 
> You most frequently will hear about your tests when they did not do their
> job well.
> 
> In this instance, I would have expected something like
> 
> 	test_expect_lines = 3 actual &&
> 
> 	head -n 1 <actual >first &&
> 	test_i18ngrep "detached HEAD" first &&

Tangential to your point, but the test_i18ngrep actually won't work since all
it does is *not* grep when the text is translated. But I want the text to use
the genuine zh_CN translation.

> 
> 	tail -n 1 <actual >last &&
> 	grep should_be_after last
> 
> instead of the "awk-ward" code above.

All fair points, and I may have been carried away when I wrote the awk code. I
liked using awk since it matched my mental model at the time, which was
procedural, as opposed to your proposed pure sh implementation, which is more
declarative.

Here is the new version of the test:

test_expect_success GETTEXT_ZH_LOCALE 'detached head sorts before branches' '
	# Ref sorting logic should put detached heads before the other
	# branches, but this is not automatic when a branch name sorts
	# lexically before "(" or the full-width "(" (Unicode codepoint FF08).
	# The latter case is nearly guaranteed for the Chinese locale.

	git -C r1 checkout HEAD^{} -- &&
	LC_ALL=$zh_CN_locale LC_MESSAGES=$zh_CN_locale \
		git -C r1 branch >actual &&
	git -C r1 checkout - &&

	head -n 1 actual >first &&
	# The first line should be enclosed by full-width parenthesis.
	grep '$'\xef\xbc\x88.*\xef\xbc\x89'' first &&
	grep master actual
'

> 
> Ciao,
> Johannes
>

Thank you for the thoughtful feedback.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH] ref-filter: sort detached HEAD lines firstly
  2019-06-10 23:49   ` [RFC PATCH] ref-filter: sort detached HEAD lines firstly Matthew DeVore
@ 2019-06-11  0:41     ` Jonathan Nieder
  2019-06-11 16:48       ` Matthew DeVore
  2019-06-11 19:53       ` Junio C Hamano
  0 siblings, 2 replies; 18+ messages in thread
From: Jonathan Nieder @ 2019-06-11  0:41 UTC (permalink / raw)
  To: Matthew DeVore
  Cc: Johannes Schindelin, Matthew DeVore, git, avarab, git,
	olyatelezhnaya, samuel.maftoul, gitster, karthik.188, pclouds,
	sunshine, emilyshaffer

Hi,

Matthew DeVore wrote:
> On Sun, Jun 09, 2019 at 10:17:19AM +0200, Johannes Schindelin wrote:

>> I find that it makes sense in general to suppress one's urges regarding
>> introducing `{ ... }` around one-liners when the patch does not actually
>> require it.
>>
>> For example, I found this patch harder than necessary to read because of
>> it.
>
> I understand the desire to make the patch itself clean, and I sometimes try to
> do that to a fault, but the style as I understand it is to put { } around all
> if branches if only one branch requires it. Since I'm already modifying the
> "else if (cmp_type == FIELD_STR)" line, I decided to put the } at the start of
> the line and modify the if (s->version) line as well. So only one line was
> modified "in excess." I think the temporary cost of the verbose patch is
> justified to keep the style consistent in narrow code fragments.

Git seems to be inconsistent about this.  Documentation/CodingGuidelines
says

        - When there are multiple arms to a conditional and some of them
          require braces, enclose even a single line block in braces for
          consistency. E.g.:

so you have some cover from there (and it matches what I'm used to,
too). :)

[...]
>>> +	LC_ALL=$zh_CN_locale LC_MESSAGES=$zh_CN_locale \
>>> +		git -C r1 branch >actual &&
>>> +	git -C r1 checkout - &&
>>
>> Why call `checkout` after `branch`? That's unnecessary, we do not verify
>> anything after that call.
>
> It's to get the repo into a neutral state in case an additional testcase is
> added in the future.

For this kind of thing, we tend to use test_when_finished so that the
test ends up in a clean state even if it fails.

[...]
> test_expect_success GETTEXT_ZH_LOCALE 'detached head sorts before branches' '
> 	# Ref sorting logic should put detached heads before the other
> 	# branches, but this is not automatic when a branch name sorts
> 	# lexically before "(" or the full-width "(" (Unicode codepoint FF08).
> 	# The latter case is nearly guaranteed for the Chinese locale.
>
> 	git -C r1 checkout HEAD^{} -- &&
> 	LC_ALL=$zh_CN_locale LC_MESSAGES=$zh_CN_locale \
> 		git -C r1 branch >actual &&
> 	git -C r1 checkout - &&
>
> 	head -n 1 actual >first &&
> 	# The first line should be enclosed by full-width parenthesis.
> 	grep '$'\xef\xbc\x88.*\xef\xbc\x89'' first &&

nit: older shells do not know how to do $'\x01' interpolation.
Probably best to use the raw UTF-8 directly here (it will be more
readable anyway).

Thanks,
Jonathan

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH] ref-filter: sort detached HEAD lines firstly
  2019-06-11  0:41     ` Jonathan Nieder
@ 2019-06-11 16:48       ` Matthew DeVore
  2019-06-11 19:53       ` Junio C Hamano
  1 sibling, 0 replies; 18+ messages in thread
From: Matthew DeVore @ 2019-06-11 16:48 UTC (permalink / raw)
  To: Jonathan Nieder
  Cc: Johannes Schindelin, Matthew DeVore, git, avarab, git,
	olyatelezhnaya, samuel.maftoul, gitster, karthik.188, pclouds,
	sunshine, emilyshaffer

On Mon, Jun 10, 2019 at 05:41:06PM -0700, Jonathan Nieder wrote:
> Git seems to be inconsistent about this.  Documentation/CodingGuidelines
> says
> 
>         - When there are multiple arms to a conditional and some of them
>           require braces, enclose even a single line block in braces for
>           consistency. E.g.:
> 
> so you have some cover from there (and it matches what I'm used to,
> too). :)

Thanks for finding that.

> 
> [...]
> >>> +	LC_ALL=$zh_CN_locale LC_MESSAGES=$zh_CN_locale \
> >>> +		git -C r1 branch >actual &&
> >>> +	git -C r1 checkout - &&
> >>
> >> Why call `checkout` after `branch`? That's unnecessary, we do not verify
> >> anything after that call.
> >
> > It's to get the repo into a neutral state in case an additional testcase is
> > added in the future.
> 
> For this kind of thing, we tend to use test_when_finished so that the
> test ends up in a clean state even if it fails.

Done. That will be used in the next roll-up.

> > 	head -n 1 actual >first &&
> > 	# The first line should be enclosed by full-width parenthesis.
> > 	grep '$'\xef\xbc\x88.*\xef\xbc\x89'' first &&
> 
> nit: older shells do not know how to do $'\x01' interpolation.
> Probably best to use the raw UTF-8 directly here (it will be more
> readable anyway).

Good point. I suppose we don't have to worry about dev's editors screwing up
encoding since modern editors make this kind of thing easy to configure (and I
suspect that all sane editors use UTF-8 as the default or at least won't mangle
it...)

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v2 0/1] Sort detached HEAD lines firstly
  2019-06-06 21:38 [RFC PATCH] ref-filter: sort detached HEAD lines firstly Matthew DeVore
  2019-06-09  8:17 ` [RFC PATCH] ref-filter: sort detached HEAD lines firstlyxy Johannes Schindelin
@ 2019-06-11 18:28 ` " Matthew DeVore
  2019-06-11 18:28   ` [PATCH v2 1/1] ref-filter: sort " Matthew DeVore
  2019-06-18 22:29 ` [PATCH v3 0/1] Sort detached heads line firstly Matthew DeVore
  2 siblings, 1 reply; 18+ messages in thread
From: Matthew DeVore @ 2019-06-11 18:28 UTC (permalink / raw)
  To: git
  Cc: Matthew DeVore, avarab, git, olyatelezhnaya, samuel.maftoul,
	gitster, Johannes.Schindelin, karthik.188, pclouds, sunshine,
	emilyshaffer, jrnieder

This version improves upon the previous:

 - fixes a bug where reverse-sorting didn't work with (HEAD detached) lines, and
   adds a test
 - makes implementation code a little DRYer
 - uses test_when_finished where applicable
 - makes zh_CN locale detection look more like the is_IS locale detection

Thanks,

Matthew DeVore (1):
  ref-filter: sort detached HEAD lines firstly

 ref-filter.c           | 20 ++++++++++++++++----
 t/lib-gettext.sh       | 22 +++++++++++++++++++---
 t/t3207-branch-intl.sh | 41 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 76 insertions(+), 7 deletions(-)
 create mode 100755 t/t3207-branch-intl.sh

-- 
2.21.0


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v2 1/1] ref-filter: sort detached HEAD lines firstly
  2019-06-11 18:28 ` [PATCH v2 0/1] Sort " Matthew DeVore
@ 2019-06-11 18:28   ` " Matthew DeVore
  2019-06-11 20:10     ` Junio C Hamano
  2019-06-12 19:51     ` Johannes Schindelin
  0 siblings, 2 replies; 18+ messages in thread
From: Matthew DeVore @ 2019-06-11 18:28 UTC (permalink / raw)
  To: git
  Cc: Matthew DeVore, avarab, git, olyatelezhnaya, samuel.maftoul,
	gitster, Johannes.Schindelin, karthik.188, pclouds, sunshine,
	emilyshaffer, jrnieder

Before this patch, "git branch" would put "(HEAD detached...)" and "(no
branch, rebasing...)" lines before all the other branches *in most
cases* and only because of the fact that "(" is a low codepoint. This
would not hold in the Chinese locale, which uses a full-width "(" symbol
(codepoint FF08). This meant that the detached HEAD line would appear
after all local refs and even after the remote refs if there were any.

Deliberately sort the detached HEAD refs before other refs when sorting
by refname rather than rely on codepoint subtleties.

Helped-by: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Helped-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Matthew DeVore <matvore@google.com>
---
 ref-filter.c           | 20 ++++++++++++++++----
 t/lib-gettext.sh       | 22 +++++++++++++++++++---
 t/t3207-branch-intl.sh | 41 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 76 insertions(+), 7 deletions(-)
 create mode 100755 t/t3207-branch-intl.sh

diff --git a/ref-filter.c b/ref-filter.c
index 8500671bc6..056d21d666 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -2157,25 +2157,37 @@ static int cmp_ref_sorting(struct ref_sorting *s, struct ref_array_item *a, stru
 	cmp_type cmp_type = used_atom[s->atom].type;
 	int (*cmp_fn)(const char *, const char *);
 	struct strbuf err = STRBUF_INIT;
 
 	if (get_ref_atom_value(a, s->atom, &va, &err))
 		die("%s", err.buf);
 	if (get_ref_atom_value(b, s->atom, &vb, &err))
 		die("%s", err.buf);
 	strbuf_release(&err);
 	cmp_fn = s->ignore_case ? strcasecmp : strcmp;
-	if (s->version)
+	if (s->version) {
 		cmp = versioncmp(va->s, vb->s);
-	else if (cmp_type == FIELD_STR)
-		cmp = cmp_fn(va->s, vb->s);
-	else {
+	} else if (cmp_type == FIELD_STR) {
+		const int a_detached = a->kind & FILTER_REFS_DETACHED_HEAD;
+
+		/*
+		 * When sorting by name, we should put "detached" head lines,
+		 * which are all the lines in parenthesis, before all others.
+		 * This usually is automatic, since "(" is before "refs/" and
+		 * "remotes/", but this does not hold for zh_CN, which uses
+		 * full-width parenthesis, so make the ordering explicit.
+		 */
+		if (a_detached != (b->kind & FILTER_REFS_DETACHED_HEAD))
+			cmp = a_detached ? -1 : 1;
+		else
+			cmp = cmp_fn(va->s, vb->s);
+	} else {
 		if (va->value < vb->value)
 			cmp = -1;
 		else if (va->value == vb->value)
 			cmp = cmp_fn(a->refname, b->refname);
 		else
 			cmp = 1;
 	}
 
 	return (s->reverse) ? -cmp : cmp;
 }
diff --git a/t/lib-gettext.sh b/t/lib-gettext.sh
index 2139b427ca..1adf1d4c31 100644
--- a/t/lib-gettext.sh
+++ b/t/lib-gettext.sh
@@ -25,23 +25,29 @@ then
 		p
 		q
 	}')
 	# is_IS.ISO8859-1 on Solaris and FreeBSD, is_IS.iso88591 on Debian
 	is_IS_iso_locale=$(locale -a 2>/dev/null |
 		sed -n '/^is_IS\.[iI][sS][oO]8859-*1$/{
 		p
 		q
 	}')
 
-	# Export them as an environment variable so the t0202/test.pl Perl
-	# test can use it too
-	export is_IS_locale is_IS_iso_locale
+	zh_CN_locale=$(locale -a 2>/dev/null |
+		sed -n '/^zh_CN\.[uU][tT][fF]-*8$/{
+		p
+		q
+	}')
+
+	# Export them as environment variables so other tests can use them
+	# too
+	export is_IS_locale is_IS_iso_locale zh_CN_locale
 
 	if test -n "$is_IS_locale" &&
 		test $GIT_INTERNAL_GETTEXT_SH_SCHEME != "fallthrough"
 	then
 		# Some of the tests need the reference Icelandic locale
 		test_set_prereq GETTEXT_LOCALE
 
 		# Exporting for t0202/test.pl
 		GETTEXT_LOCALE=1
 		export GETTEXT_LOCALE
@@ -53,11 +59,21 @@ then
 	if test -n "$is_IS_iso_locale" &&
 		test $GIT_INTERNAL_GETTEXT_SH_SCHEME != "fallthrough"
 	then
 		# Some of the tests need the reference Icelandic locale
 		test_set_prereq GETTEXT_ISO_LOCALE
 
 		say "# lib-gettext: Found '$is_IS_iso_locale' as an is_IS ISO-8859-1 locale"
 	else
 		say "# lib-gettext: No is_IS ISO-8859-1 locale available"
 	fi
+
+	if test -n "$zh_CN_locale" &&
+		test $GIT_INTERNAL_GETTEXT_SH_SCHEME != "fallthrough"
+	then
+		test_set_prereq GETTEXT_ZH_LOCALE
+
+		say "# lib-gettext: Found '$zh_CN_locale' as a zh_CN UTF-8 locale"
+	else
+		say "# lib-gettext: No zh_CN UTF-8 locale available"
+	fi
 fi
diff --git a/t/t3207-branch-intl.sh b/t/t3207-branch-intl.sh
new file mode 100755
index 0000000000..a46538188c
--- /dev/null
+++ b/t/t3207-branch-intl.sh
@@ -0,0 +1,41 @@
+#!/bin/sh
+
+test_description='git branch internationalization tests'
+
+. ./lib-gettext.sh
+
+test_expect_success 'init repo' '
+	git init r1 &&
+	test_commit -C r1 first
+'
+
+test_expect_success GETTEXT_ZH_LOCALE 'detached head sorts before branches' '
+	# Ref sorting logic should put detached heads before the other
+	# branches, but this is not automatic when a branch name sorts
+	# lexically before "(" or the full-width "(" (Unicode codepoint FF08).
+	# The latter case is nearly guaranteed for the Chinese locale.
+
+	test_when_finished "git -C r1 checkout master" &&
+
+	git -C r1 checkout HEAD^{} -- &&
+	LC_ALL=$zh_CN_locale LC_MESSAGES=$zh_CN_locale \
+		git -C r1 branch >actual &&
+
+	head -n 1 actual >first &&
+	# The first line should be enclosed by full-width parenthesis.
+	grep "(.*)" first &&
+	grep master actual
+'
+
+test_expect_success 'detached head honors reverse sorting' '
+	test_when_finished "git -C r1 checkout master" &&
+
+	git -C r1 checkout HEAD^{} -- &&
+	git -C r1 branch --sort=-refname >actual &&
+
+	head -n 1 actual >first &&
+	grep master first &&
+	test_i18ngrep "HEAD detached" actual
+'
+
+test_done
-- 
2.21.0


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH] ref-filter: sort detached HEAD lines firstly
  2019-06-11  0:41     ` Jonathan Nieder
  2019-06-11 16:48       ` Matthew DeVore
@ 2019-06-11 19:53       ` Junio C Hamano
  1 sibling, 0 replies; 18+ messages in thread
From: Junio C Hamano @ 2019-06-11 19:53 UTC (permalink / raw)
  To: Jonathan Nieder
  Cc: Matthew DeVore, Johannes Schindelin, Matthew DeVore, git, avarab,
	git, olyatelezhnaya, samuel.maftoul, karthik.188, pclouds,
	sunshine, emilyshaffer

Jonathan Nieder <jrnieder@gmail.com> writes:

> Git seems to be inconsistent about this.  Documentation/CodingGuidelines
> says
>
>         - When there are multiple arms to a conditional and some of them
>           require braces, enclose even a single line block in braces for
>           consistency. E.g.:
>
> so you have some cover from there (and it matches what I'm used to,
> too). :)

Yup, it took us for quite some time before we settled on that rule
and wrote it down, so there are some lines that predate it *and*
have survived.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 1/1] ref-filter: sort detached HEAD lines firstly
  2019-06-11 18:28   ` [PATCH v2 1/1] ref-filter: sort " Matthew DeVore
@ 2019-06-11 20:10     ` Junio C Hamano
  2019-06-12 21:09       ` Junio C Hamano
  2019-06-12 19:51     ` Johannes Schindelin
  1 sibling, 1 reply; 18+ messages in thread
From: Junio C Hamano @ 2019-06-11 20:10 UTC (permalink / raw)
  To: Matthew DeVore
  Cc: git, avarab, git, olyatelezhnaya, samuel.maftoul,
	Johannes.Schindelin, karthik.188, pclouds, sunshine,
	emilyshaffer, jrnieder

Matthew DeVore <matvore@google.com> writes:

> -	if (s->version)
> +	if (s->version) {
>  		cmp = versioncmp(va->s, vb->s);
> -	else if (cmp_type == FIELD_STR)
> -		cmp = cmp_fn(va->s, vb->s);
> -	else {

Ah, this must be the patch noise Jonathan was (half) complaining
about.  It does make it a bit distracting to read the patch but the
resulting code is of course easier to follow ;-).

> +	} else if (cmp_type == FIELD_STR) {
> +		const int a_detached = a->kind & FILTER_REFS_DETACHED_HEAD;
> +
> +		/*
> +		 * When sorting by name, we should put "detached" head lines,
> +		 * which are all the lines in parenthesis, before all others.
> +		 * This usually is automatic, since "(" is before "refs/" and
> +		 * "remotes/", but this does not hold for zh_CN, which uses
> +		 * full-width parenthesis, so make the ordering explicit.
> +		 */
> +		if (a_detached != (b->kind & FILTER_REFS_DETACHED_HEAD))
> +			cmp = a_detached ? -1 : 1;

So, comparing a detached and an undetached ones, the detached side
always sorts lower.  Good.  And ...

> +		else
> +			cmp = cmp_fn(va->s, vb->s);

... otherwise we compare the string using the given function.

Sounds sensible.  Will queue.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 1/1] ref-filter: sort detached HEAD lines firstly
  2019-06-11 18:28   ` [PATCH v2 1/1] ref-filter: sort " Matthew DeVore
  2019-06-11 20:10     ` Junio C Hamano
@ 2019-06-12 19:51     ` Johannes Schindelin
  2019-06-13 16:58       ` Jeff King
  1 sibling, 1 reply; 18+ messages in thread
From: Johannes Schindelin @ 2019-06-12 19:51 UTC (permalink / raw)
  To: Matthew DeVore
  Cc: git, avarab, git, olyatelezhnaya, samuel.maftoul, gitster,
	karthik.188, pclouds, sunshine, emilyshaffer, jrnieder

[-- Attachment #1: Type: text/plain, Size: 8112 bytes --]

Hi Matthew,

On Tue, 11 Jun 2019, Matthew DeVore wrote:

> diff --git a/ref-filter.c b/ref-filter.c
> index 8500671bc6..056d21d666 100644
> --- a/ref-filter.c
> +++ b/ref-filter.c
> @@ -2157,25 +2157,37 @@ static int cmp_ref_sorting(struct ref_sorting *s, struct ref_array_item *a, stru
>  	cmp_type cmp_type = used_atom[s->atom].type;
>  	int (*cmp_fn)(const char *, const char *);
>  	struct strbuf err = STRBUF_INIT;
>
>  	if (get_ref_atom_value(a, s->atom, &va, &err))
>  		die("%s", err.buf);
>  	if (get_ref_atom_value(b, s->atom, &vb, &err))
>  		die("%s", err.buf);
>  	strbuf_release(&err);
>  	cmp_fn = s->ignore_case ? strcasecmp : strcmp;
> -	if (s->version)
> +	if (s->version) {
>  		cmp = versioncmp(va->s, vb->s);
> -	else if (cmp_type == FIELD_STR)
> -		cmp = cmp_fn(va->s, vb->s);
> -	else {
> +	} else if (cmp_type == FIELD_STR) {

I still think that this slipped-in `{` makes this patch harder to read
than necessary.

Your argument that you introduce the first curlies in an `else` block does
not hold, as the removed `else {` line above demonstrates quite clearly.

But you seem dead set to do it nevertheless, so I'll save my breath.

> +		const int a_detached = a->kind & FILTER_REFS_DETACHED_HEAD;
> +
> +		/*
> +		 * When sorting by name, we should put "detached" head lines,
> +		 * which are all the lines in parenthesis, before all others.
> +		 * This usually is automatic, since "(" is before "refs/" and
> +		 * "remotes/", but this does not hold for zh_CN, which uses
> +		 * full-width parenthesis, so make the ordering explicit.
> +		 */
> +		if (a_detached != (b->kind & FILTER_REFS_DETACHED_HEAD))
> +			cmp = a_detached ? -1 : 1;
> +		else
> +			cmp = cmp_fn(va->s, vb->s);
> +	} else {
>  		if (va->value < vb->value)
>  			cmp = -1;
>  		else if (va->value == vb->value)
>  			cmp = cmp_fn(a->refname, b->refname);
>  		else
>  			cmp = 1;
>  	}
>
>  	return (s->reverse) ? -cmp : cmp;
>  }
> diff --git a/t/lib-gettext.sh b/t/lib-gettext.sh
> index 2139b427ca..1adf1d4c31 100644
> --- a/t/lib-gettext.sh
> +++ b/t/lib-gettext.sh
> @@ -25,23 +25,29 @@ then
>  		p
>  		q
>  	}')
>  	# is_IS.ISO8859-1 on Solaris and FreeBSD, is_IS.iso88591 on Debian
>  	is_IS_iso_locale=$(locale -a 2>/dev/null |
>  		sed -n '/^is_IS\.[iI][sS][oO]8859-*1$/{
>  		p
>  		q
>  	}')
>
> -	# Export them as an environment variable so the t0202/test.pl Perl
> -	# test can use it too
> -	export is_IS_locale is_IS_iso_locale
> +	zh_CN_locale=$(locale -a 2>/dev/null |
> +		sed -n '/^zh_CN\.[uU][tT][fF]-*8$/{
> +		p
> +		q
> +	}')
> +
> +	# Export them as environment variables so other tests can use them
> +	# too
> +	export is_IS_locale is_IS_iso_locale zh_CN_locale
>
>  	if test -n "$is_IS_locale" &&
>  		test $GIT_INTERNAL_GETTEXT_SH_SCHEME != "fallthrough"
>  	then
>  		# Some of the tests need the reference Icelandic locale
>  		test_set_prereq GETTEXT_LOCALE
>
>  		# Exporting for t0202/test.pl
>  		GETTEXT_LOCALE=1
>  		export GETTEXT_LOCALE
> @@ -53,11 +59,21 @@ then
>  	if test -n "$is_IS_iso_locale" &&
>  		test $GIT_INTERNAL_GETTEXT_SH_SCHEME != "fallthrough"
>  	then
>  		# Some of the tests need the reference Icelandic locale
>  		test_set_prereq GETTEXT_ISO_LOCALE
>
>  		say "# lib-gettext: Found '$is_IS_iso_locale' as an is_IS ISO-8859-1 locale"
>  	else
>  		say "# lib-gettext: No is_IS ISO-8859-1 locale available"
>  	fi
> +
> +	if test -n "$zh_CN_locale" &&
> +		test $GIT_INTERNAL_GETTEXT_SH_SCHEME != "fallthrough"
> +	then
> +		test_set_prereq GETTEXT_ZH_LOCALE
> +
> +		say "# lib-gettext: Found '$zh_CN_locale' as a zh_CN UTF-8 locale"
> +	else
> +		say "# lib-gettext: No zh_CN UTF-8 locale available"
> +	fi
>  fi
> diff --git a/t/t3207-branch-intl.sh b/t/t3207-branch-intl.sh
> new file mode 100755
> index 0000000000..a46538188c
> --- /dev/null
> +++ b/t/t3207-branch-intl.sh
> @@ -0,0 +1,41 @@
> +#!/bin/sh
> +
> +test_description='git branch internationalization tests'
> +
> +. ./lib-gettext.sh
> +
> +test_expect_success 'init repo' '
> +	git init r1 &&
> +	test_commit -C r1 first
> +'

I still see absolutely no need for initializing `r1`. Every test script in
Git's test suite starts out with a fully initialized repository, no `git
init` necessary. Therefore, this test case seems to have an unnecessary
`git init` and multiple unnecessary `-C r1` options that make the script
quite noisy.

I mean, you initialize that `r1`, work on it exclusively, and completely
ignore the repository that has been initialized in `.git` for you.

> +test_expect_success GETTEXT_ZH_LOCALE 'detached head sorts before branches' '
> +	# Ref sorting logic should put detached heads before the other
> +	# branches, but this is not automatic when a branch name sorts
> +	# lexically before "(" or the full-width "(" (Unicode codepoint FF08).
> +	# The latter case is nearly guaranteed for the Chinese locale.
> +
> +	test_when_finished "git -C r1 checkout master" &&
> +
> +	git -C r1 checkout HEAD^{} -- &&

`HEAD^0` is a much more canonical way to say this. However, if you want
your test case to be easy to understand (and that is your goal, too,
right, not only mine?), you will instead use

	git checkout --detach

> +	LC_ALL=$zh_CN_locale LC_MESSAGES=$zh_CN_locale \
> +		git -C r1 branch >actual &&
> +
> +	head -n 1 actual >first &&
> +	# The first line should be enclosed by full-width parenthesis.
> +	grep "(.*)" first &&

I wonder whether it is a good idea to pretend that we can pass arbitrary
byte sequences to `grep`, independent of the current locale. On Windows,
this does not hold true, for example.

It would probably make more sense to store a support file in t/t3207/,
much like it is done in t3900.

And once you do that, you can simply `test_cmp t3207/first first`. No
need to `grep` for `master` in addition:

> +	grep master actual
> +'
> +
> +test_expect_success 'detached head honors reverse sorting' '
> +	test_when_finished "git -C r1 checkout master" &&

Hmm. I see you also did that in the previous test case, but since you
immediately detach the HEAD, I have to ask:

- why? Why do you insist on switching back to `master` after the test case
  finished?
- Why even bother to call `git checkout --detach` in anything but the very
  first test case, whose purpose it is to set things up for the subsequent
  test cases, after all?

> +
> +	git -C r1 checkout HEAD^{} -- &&
> +	git -C r1 branch --sort=-refname >actual &&
> +
> +	head -n 1 actual >first &&
> +	grep master first &&
> +	test_i18ngrep "HEAD detached" actual

Funny, reading the test case's title, I would have expected to read
instead:

	echo "* HEAD detached" >expect &&
	tail -n 1 actual >last &&
	test_cmp expect last

In all, the test script should read more like this:

	test_expect_success 'setup' '
		test_commit first &&
		git checkout --detach
	'

	# [... long comment here, does not need to be hidden and indented
	# inside...]
	test_expect_success GETTEXT_ZH_LOCALE 'detached HEAD sorts first' '
		LC_ALL=$zh_CN_locale LC_MESSAGES=$zh_CN_locale git branch >actual &&

		head -n 1 <actual >first &&
		test_cmp "$TEST_DIRECTORY/../t3207/first" first
	'

	test_expect_success 'detached HEAD reverse-sorts last' '
		git branch --sort=-refname >actual &&

		echo "* HEAD detached" >expect &&
		tail -n 1 actual >last &&
		test_cmp expect last
	'

It is quite possible that this can be simplified even further, i.e. made
even easier to understand for developers in the unfortunate situation of
having to debug a regression (which is the entire goal of a well-written
regression test: to help, rather than just to force, developers to debug
regressions).

Granted, the simpler form might look like it took less effort to write
than the complicated one. People with some experience in software
development will understand the opposite to be true, though.

Ciao,
Dscho

> +'
> +
> +test_done
> --
> 2.21.0
>
>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 1/1] ref-filter: sort detached HEAD lines firstly
  2019-06-11 20:10     ` Junio C Hamano
@ 2019-06-12 21:09       ` Junio C Hamano
  2019-06-12 21:21         ` Matthew DeVore
  2019-06-13  1:56         ` Matthew DeVore
  0 siblings, 2 replies; 18+ messages in thread
From: Junio C Hamano @ 2019-06-12 21:09 UTC (permalink / raw)
  To: Matthew DeVore
  Cc: git, avarab, git, olyatelezhnaya, samuel.maftoul,
	Johannes.Schindelin, karthik.188, pclouds, sunshine,
	emilyshaffer, jrnieder

Junio C Hamano <gitster@pobox.com> writes:

>> +		/*
>> +		 * When sorting by name, we should put "detached" head lines,
>> +		 * which are all the lines in parenthesis, before all others.
>> +		 * This usually is automatic, since "(" is before "refs/" and
>> +		 * "remotes/", but this does not hold for zh_CN, which uses
>> +		 * full-width parenthesis, so make the ordering explicit.
>> +		 */
>> +		if (a_detached != (b->kind & FILTER_REFS_DETACHED_HEAD))
>> +			cmp = a_detached ? -1 : 1;
>
> So, comparing a detached and an undetached ones, the detached side
> always sorts lower.  Good.  And ...
>
>> +		else
>> +			cmp = cmp_fn(va->s, vb->s);
>
> ... otherwise we compare the string using the given function.
>
> Sounds sensible.  Will queue.

Stepping back a bit, why are we even allowing the surrounding ()
pair to be futzed by the translators?

IOW, shouldn't our code more like this from the beginning, with or
without Chinese translation?

With a bit more work, we may even be able to lose "make sure this
matches the one in wt-status.c" comment as losing the leading '('
would take us one step closer to have an identical string here as we
have in wt-status.c

 ref-filter.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/ref-filter.c b/ref-filter.c
index 8500671bc6..7e4705fcb2 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1459,20 +1459,22 @@ char *get_head_description(void)
 		strbuf_addf(&desc, _("(no branch, bisect started on %s)"),
 			    state.branch);
 	else if (state.detached_from) {
+		strbuf_addch(&desc, '(');
 		if (state.detached_at)
 			/*
 			 * TRANSLATORS: make sure this matches "HEAD
 			 * detached at " in wt-status.c
 			 */
-			strbuf_addf(&desc, _("(HEAD detached at %s)"),
-				state.detached_from);
+			strbuf_addf(&desc, _("HEAD detached at %s"),
+				    state.detached_from);
 		else
 			/*
 			 * TRANSLATORS: make sure this matches "HEAD
 			 * detached from " in wt-status.c
 			 */
-			strbuf_addf(&desc, _("(HEAD detached from %s)"),
+			strbuf_addf(&desc, _("HEAD detached from %s"),
 				state.detached_from);
+		strbuf_addch(&desc, ')');
 	}
 	else
 		strbuf_addstr(&desc, _("(no branch)"));



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 1/1] ref-filter: sort detached HEAD lines firstly
  2019-06-12 21:09       ` Junio C Hamano
@ 2019-06-12 21:21         ` Matthew DeVore
  2019-06-13  1:56         ` Matthew DeVore
  1 sibling, 0 replies; 18+ messages in thread
From: Matthew DeVore @ 2019-06-12 21:21 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Matthew DeVore, git, avarab, git, olyatelezhnaya, samuel.maftoul,
	Johannes.Schindelin, karthik.188, pclouds, sunshine,
	emilyshaffer, jrnieder

On Wed, Jun 12, 2019 at 02:09:53PM -0700, Junio C Hamano wrote:
> Junio C Hamano <gitster@pobox.com> writes:
> 
> >> +		/*
> >> +		 * When sorting by name, we should put "detached" head lines,
> >> +		 * which are all the lines in parenthesis, before all others.
> >> +		 * This usually is automatic, since "(" is before "refs/" and
> >> +		 * "remotes/", but this does not hold for zh_CN, which uses
> >> +		 * full-width parenthesis, so make the ordering explicit.
> >> +		 */
> >> +		if (a_detached != (b->kind & FILTER_REFS_DETACHED_HEAD))
> >> +			cmp = a_detached ? -1 : 1;
> >
> > So, comparing a detached and an undetached ones, the detached side
> > always sorts lower.  Good.  And ...
> >
> >> +		else
> >> +			cmp = cmp_fn(va->s, vb->s);
> >
> > ... otherwise we compare the string using the given function.
> >
> > Sounds sensible.  Will queue.
> 
> Stepping back a bit, why are we even allowing the surrounding ()
> pair to be futzed by the translators?

I was thinking about removing () from the translated strings, but decided
against it since there are a lot of full-width parenthesis in the translated
strings already:

$ cd po; git grep -B 1 'msgstr.*('
... 246 matches in zh_CN ...

and it seems strange to force only a few pairs of parens to be half-width to
make the code simpler. I don't know if that's a great argument, since it is
somewhat aesthetic. I would have liked half-width parens more if it were
closing off purely ASCII text. But it is in fact surrounding Chinese text:

$ git branch
* (头指针分离于 cf0246a5cc)

> 
> IOW, shouldn't our code more like this from the beginning, with or
> without Chinese translation?
> 
> With a bit more work, we may even be able to lose "make sure this
> matches the one in wt-status.c" comment as losing the leading '('
> would take us one step closer to have an identical string here as we
> have in wt-status.c
> 
>  ref-filter.c | 8 +++++---
>  1 file changed, 5 insertions(+), 3 deletions(-)
> 
> diff --git a/ref-filter.c b/ref-filter.c
> index 8500671bc6..7e4705fcb2 100644
> --- a/ref-filter.c
> +++ b/ref-filter.c
> @@ -1459,20 +1459,22 @@ char *get_head_description(void)
>  		strbuf_addf(&desc, _("(no branch, bisect started on %s)"),
>  			    state.branch);
>  	else if (state.detached_from) {
> +		strbuf_addch(&desc, '(');
>  		if (state.detached_at)
>  			/*
>  			 * TRANSLATORS: make sure this matches "HEAD
>  			 * detached at " in wt-status.c
>  			 */
> -			strbuf_addf(&desc, _("(HEAD detached at %s)"),
> -				state.detached_from);
> +			strbuf_addf(&desc, _("HEAD detached at %s"),
> +				    state.detached_from);
>  		else
>  			/*
>  			 * TRANSLATORS: make sure this matches "HEAD
>  			 * detached from " in wt-status.c
>  			 */
> -			strbuf_addf(&desc, _("(HEAD detached from %s)"),
> +			strbuf_addf(&desc, _("HEAD detached from %s"),
>  				state.detached_from);
> +		strbuf_addch(&desc, ')');
>  	}
>  	else
>  		strbuf_addstr(&desc, _("(no branch)"));
> 
> 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 1/1] ref-filter: sort detached HEAD lines firstly
  2019-06-12 21:09       ` Junio C Hamano
  2019-06-12 21:21         ` Matthew DeVore
@ 2019-06-13  1:56         ` Matthew DeVore
  1 sibling, 0 replies; 18+ messages in thread
From: Matthew DeVore @ 2019-06-13  1:56 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Matthew DeVore, git, avarab, git, olyatelezhnaya, samuel.maftoul,
	Johannes.Schindelin, karthik.188, pclouds, sunshine,
	emilyshaffer, jrnieder

On Wed, Jun 12, 2019 at 02:09:53PM -0700, Junio C Hamano wrote:
> Junio C Hamano <gitster@pobox.com> writes:
> Stepping back a bit, why are we even allowing the surrounding ()
> pair to be futzed by the translators?
> 
> IOW, shouldn't our code more like this from the beginning, with or
> without Chinese translation?
> 
> With a bit more work, we may even be able to lose "make sure this
> matches the one in wt-status.c" comment as losing the leading '('
> would take us one step closer to have an identical string here as we
> have in wt-status.c

I think my previous e-mail didn't make it to the public list, maybe since it
contained non-ASCII text. The gist of that mail was that we have full-width
parens in various zh_CN strings so it seems hacky to make just this one be
half-width for the sake of code simplicity.

Giving this a bit more thought now, perhaps the fact that we want to be in-sync
with the string in wt-status.c, combined with the fact that we already have "*"
as half-width metacharacter in the "git branch" output, is a good enough excuse
to drop "()" out of the translatable string, as your patch does.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 1/1] ref-filter: sort detached HEAD lines firstly
  2019-06-12 19:51     ` Johannes Schindelin
@ 2019-06-13 16:58       ` Jeff King
  0 siblings, 0 replies; 18+ messages in thread
From: Jeff King @ 2019-06-13 16:58 UTC (permalink / raw)
  To: Johannes Schindelin
  Cc: Matthew DeVore, git, avarab, git, olyatelezhnaya, samuel.maftoul,
	gitster, karthik.188, pclouds, sunshine, emilyshaffer, jrnieder

On Wed, Jun 12, 2019 at 09:51:33PM +0200, Johannes Schindelin wrote:

> > +	head -n 1 actual >first &&
> > +	# The first line should be enclosed by full-width parenthesis.
> > +	grep "(.*)" first &&
> 
> I wonder whether it is a good idea to pretend that we can pass arbitrary
> byte sequences to `grep`, independent of the current locale. On Windows,
> this does not hold true, for example.
> 
> It would probably make more sense to store a support file in t/t3207/,
> much like it is done in t3900.
> 
> And once you do that, you can simply `test_cmp t3207/first first`. No
> need to `grep` for `master` in addition:

I was just writing a similar response in another part of the thread, and
found this. :)

In addition to grep portability problems, IMHO the source with the raw
UTF-8 characters is harder to read. Even if your editor and terminal
support UTF-8, people without the right fonts will just get a bunch of
empty boxes. And when debugging, you often care about the raw bytes
anyway (e.g., when there are multiple representations of the same
glyph).

Adding a support file is fine, but for small cases like this, it may be
easier to do:

  printf '\357\274\210...' >expect

but note that this _must_ be octal, not hex, as many versions of printf
only handle the former.

-Peff

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v3 0/1] Sort detached heads line firstly
  2019-06-06 21:38 [RFC PATCH] ref-filter: sort detached HEAD lines firstly Matthew DeVore
  2019-06-09  8:17 ` [RFC PATCH] ref-filter: sort detached HEAD lines firstlyxy Johannes Schindelin
  2019-06-11 18:28 ` [PATCH v2 0/1] Sort " Matthew DeVore
@ 2019-06-18 22:29 ` Matthew DeVore
  2019-06-18 22:29   ` [PATCH v3 1/1] ref-filter: sort detached HEAD lines firstly Matthew DeVore
  2 siblings, 1 reply; 18+ messages in thread
From: Matthew DeVore @ 2019-06-18 22:29 UTC (permalink / raw)
  To: git
  Cc: Matthew DeVore, avarab, git, olyatelezhnaya, samuel.maftoul,
	gitster, Johannes.Schindelin, karthik.188, pclouds, sunshine,
	emilyshaffer, jrnieder

In the interest of simplifying the code and the job of translators, I've
decided to simply remove the ( from the translatable strings as suggested by
Junio in a response to v2 of this patchset.

I thought that a regression test would be a bit overkill for a fix of this
nature. Instead, I've added a cautionary comment to not add the ( back to the
translatable string.

Thank you,

Matthew DeVore (1):
  ref-filter: sort detached HEAD lines firstly

 ref-filter.c | 32 ++++++++++++++++----------------
 wt-status.c  |  4 ++--
 wt-status.h  |  3 +++
 3 files changed, 21 insertions(+), 18 deletions(-)

-- 
2.21.0


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v3 1/1] ref-filter: sort detached HEAD lines firstly
  2019-06-18 22:29 ` [PATCH v3 0/1] Sort detached heads line firstly Matthew DeVore
@ 2019-06-18 22:29   ` Matthew DeVore
  2019-06-19 15:29     ` Junio C Hamano
  0 siblings, 1 reply; 18+ messages in thread
From: Matthew DeVore @ 2019-06-18 22:29 UTC (permalink / raw)
  To: git
  Cc: Matthew DeVore, avarab, git, olyatelezhnaya, samuel.maftoul,
	gitster, Johannes.Schindelin, karthik.188, pclouds, sunshine,
	emilyshaffer, jrnieder

Before this patch, "git branch" would put "(HEAD detached...)" and "(no
branch, rebasing...)" lines before all the other branches *in most
cases* except for when using Chinese-language messages. zh_CN generally
uses a full-width "(" symbol (codepoint FF08) to match the full-width
proportions of Chinese characters, and the translated strings we had did
use them. This meant that the detached HEAD line would appear after all
local refs and even after the remote refs if there were any.

AFAIK, it is sometimes not jarring to see the half-width parenthesis in
"full-width" text as in the CJK languages, for instance when there are
no characters preceding or following the parenthesized text fragment. By
removing the parenthesis from the localizable text, we can share strings
with wt-status.c and remove a cautionary comment to translators.

Remove the ( from the localizable portion of messages so the sorting
happens properly regardless of locale.

Helped-by: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Helped-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Matthew DeVore <matvore@google.com>
---
 ref-filter.c | 32 ++++++++++++++++----------------
 wt-status.c  |  4 ++--
 wt-status.h  |  3 +++
 3 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/ref-filter.c b/ref-filter.c
index 8500671bc6..87aa6b4774 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1440,49 +1440,49 @@ static void fill_remote_ref_details(struct used_atom *atom, const char *refname,
 	} else
 		BUG("unhandled RR_* enum");
 }
 
 char *get_head_description(void)
 {
 	struct strbuf desc = STRBUF_INIT;
 	struct wt_status_state state;
 	memset(&state, 0, sizeof(state));
 	wt_status_get_state(the_repository, &state, 1);
+
+	/*
+	 * The ( character must be hard-coded and not part of a localizable
+	 * string, since the description is used as a sort key and compared
+	 * with ref names.
+	 */
+	strbuf_addch(&desc, '(');
 	if (state.rebase_in_progress ||
 	    state.rebase_interactive_in_progress) {
 		if (state.branch)
-			strbuf_addf(&desc, _("(no branch, rebasing %s)"),
+			strbuf_addf(&desc, _("no branch, rebasing %s"),
 				    state.branch);
 		else
-			strbuf_addf(&desc, _("(no branch, rebasing detached HEAD %s)"),
+			strbuf_addf(&desc, _("no branch, rebasing detached HEAD %s"),
 				    state.detached_from);
 	} else if (state.bisect_in_progress)
-		strbuf_addf(&desc, _("(no branch, bisect started on %s)"),
+		strbuf_addf(&desc, _("no branch, bisect started on %s"),
 			    state.branch);
 	else if (state.detached_from) {
 		if (state.detached_at)
-			/*
-			 * TRANSLATORS: make sure this matches "HEAD
-			 * detached at " in wt-status.c
-			 */
-			strbuf_addf(&desc, _("(HEAD detached at %s)"),
-				state.detached_from);
+			strbuf_addstr(&desc, HEAD_DETACHED_AT);
 		else
-			/*
-			 * TRANSLATORS: make sure this matches "HEAD
-			 * detached from " in wt-status.c
-			 */
-			strbuf_addf(&desc, _("(HEAD detached from %s)"),
-				state.detached_from);
+			strbuf_addstr(&desc, HEAD_DETACHED_FROM);
+		strbuf_addstr(&desc, state.detached_from);
 	}
 	else
-		strbuf_addstr(&desc, _("(no branch)"));
+		strbuf_addstr(&desc, _("no branch"));
+	strbuf_addch(&desc, ')');
+
 	free(state.branch);
 	free(state.onto);
 	free(state.detached_from);
 	return strbuf_detach(&desc, NULL);
 }
 
 static const char *get_symref(struct used_atom *atom, struct ref_array_item *ref)
 {
 	if (!ref->symref)
 		return xstrdup("");
diff --git a/wt-status.c b/wt-status.c
index 0bccef542f..c29e4bf091 100644
--- a/wt-status.c
+++ b/wt-status.c
@@ -1669,23 +1669,23 @@ static void wt_longstatus_print(struct wt_status *s)
 			if (s->state.rebase_in_progress ||
 			    s->state.rebase_interactive_in_progress) {
 				if (s->state.rebase_interactive_in_progress)
 					on_what = _("interactive rebase in progress; onto ");
 				else
 					on_what = _("rebase in progress; onto ");
 				branch_name = s->state.onto;
 			} else if (s->state.detached_from) {
 				branch_name = s->state.detached_from;
 				if (s->state.detached_at)
-					on_what = _("HEAD detached at ");
+					on_what = HEAD_DETACHED_AT;
 				else
-					on_what = _("HEAD detached from ");
+					on_what = HEAD_DETACHED_FROM;
 			} else {
 				branch_name = "";
 				on_what = _("Not currently on any branch.");
 			}
 		} else
 			skip_prefix(branch_name, "refs/heads/", &branch_name);
 		status_printf(s, color(WT_STATUS_HEADER, s), "%s", "");
 		status_printf_more(s, branch_status_color, "%s", on_what);
 		status_printf_more(s, branch_color, "%s\n", branch_name);
 		if (!s->is_initial)
diff --git a/wt-status.h b/wt-status.h
index 64f1ddc9fd..b0cfdc8011 100644
--- a/wt-status.h
+++ b/wt-status.h
@@ -58,20 +58,23 @@ struct wt_status_change_data {
 enum wt_status_format {
 	STATUS_FORMAT_NONE = 0,
 	STATUS_FORMAT_LONG,
 	STATUS_FORMAT_SHORT,
 	STATUS_FORMAT_PORCELAIN,
 	STATUS_FORMAT_PORCELAIN_V2,
 
 	STATUS_FORMAT_UNSPECIFIED
 };
 
+#define HEAD_DETACHED_AT _("HEAD detached at ")
+#define HEAD_DETACHED_FROM _("HEAD detached from ")
+
 struct wt_status_state {
 	int merge_in_progress;
 	int am_in_progress;
 	int am_empty_patch;
 	int rebase_in_progress;
 	int rebase_interactive_in_progress;
 	int cherry_pick_in_progress;
 	int bisect_in_progress;
 	int revert_in_progress;
 	int detached_at;
-- 
2.21.0


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3 1/1] ref-filter: sort detached HEAD lines firstly
  2019-06-18 22:29   ` [PATCH v3 1/1] ref-filter: sort detached HEAD lines firstly Matthew DeVore
@ 2019-06-19 15:29     ` Junio C Hamano
  0 siblings, 0 replies; 18+ messages in thread
From: Junio C Hamano @ 2019-06-19 15:29 UTC (permalink / raw)
  To: Matthew DeVore
  Cc: git, avarab, git, olyatelezhnaya, samuel.maftoul,
	Johannes.Schindelin, karthik.188, pclouds, sunshine,
	emilyshaffer, jrnieder

Matthew DeVore <matvore@google.com> writes:

> ... By
> removing the parenthesis from the localizable text, we can share strings
> with wt-status.c and remove a cautionary comment to translators.
...
> -			/*
> -			 * TRANSLATORS: make sure this matches "HEAD
> -			 * detached at " in wt-status.c
> -			 */
> -			strbuf_addf(&desc, _("(HEAD detached at %s)"),
> -				state.detached_from);
> +			strbuf_addstr(&desc, HEAD_DETACHED_AT);
>  		else
> -			/*
> -			 * TRANSLATORS: make sure this matches "HEAD
> -			 * detached from " in wt-status.c
> -			 */
> -			strbuf_addf(&desc, _("(HEAD detached from %s)"),
> -				state.detached_from);
> +			strbuf_addstr(&desc, HEAD_DETACHED_FROM);

Very nice ;-)

> +		strbuf_addstr(&desc, state.detached_from);
>  	}
>  	else
> -		strbuf_addstr(&desc, _("(no branch)"));
> +		strbuf_addstr(&desc, _("no branch"));
> +	strbuf_addch(&desc, ')');
> +
>  	free(state.branch);
>  	free(state.onto);
>  	free(state.detached_from);
>  	return strbuf_detach(&desc, NULL);
>  }

> diff --git a/wt-status.h b/wt-status.h
> index 64f1ddc9fd..b0cfdc8011 100644
> --- a/wt-status.h
> +++ b/wt-status.h
> @@ -58,20 +58,23 @@ struct wt_status_change_data {
> ...
>  
> +#define HEAD_DETACHED_AT _("HEAD detached at ")
> +#define HEAD_DETACHED_FROM _("HEAD detached from ")
> +
>  struct wt_status_state {

These too.

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, back to index

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-06-06 21:38 [RFC PATCH] ref-filter: sort detached HEAD lines firstly Matthew DeVore
2019-06-09  8:17 ` [RFC PATCH] ref-filter: sort detached HEAD lines firstlyxy Johannes Schindelin
2019-06-09 16:39   ` Johannes Schindelin
2019-06-10 23:49   ` [RFC PATCH] ref-filter: sort detached HEAD lines firstly Matthew DeVore
2019-06-11  0:41     ` Jonathan Nieder
2019-06-11 16:48       ` Matthew DeVore
2019-06-11 19:53       ` Junio C Hamano
2019-06-11 18:28 ` [PATCH v2 0/1] Sort " Matthew DeVore
2019-06-11 18:28   ` [PATCH v2 1/1] ref-filter: sort " Matthew DeVore
2019-06-11 20:10     ` Junio C Hamano
2019-06-12 21:09       ` Junio C Hamano
2019-06-12 21:21         ` Matthew DeVore
2019-06-13  1:56         ` Matthew DeVore
2019-06-12 19:51     ` Johannes Schindelin
2019-06-13 16:58       ` Jeff King
2019-06-18 22:29 ` [PATCH v3 0/1] Sort detached heads line firstly Matthew DeVore
2019-06-18 22:29   ` [PATCH v3 1/1] ref-filter: sort detached HEAD lines firstly Matthew DeVore
2019-06-19 15:29     ` Junio C Hamano

git@vger.kernel.org list mirror (unofficial, one of many)

Archives are clonable:
	git clone --mirror https://public-inbox.org/git
	git clone --mirror http://ou63pmih66umazou.onion/git
	git clone --mirror http://czquwvybam4bgbro.onion/git
	git clone --mirror http://hjrcffqmbrq6wope.onion/git

Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.version-control.git
	nntp://ou63pmih66umazou.onion/inbox.comp.version-control.git
	nntp://czquwvybam4bgbro.onion/inbox.comp.version-control.git
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.version-control.git
	nntp://news.gmane.org/gmane.comp.version-control.git

 note: .onion URLs require Tor: https://www.torproject.org/

AGPL code for this site: git clone https://public-inbox.org/ public-inbox