bug-gnulib@gnu.org mirror (unofficial)
 help / color / mirror / Atom feed
* localename: Notice setlocale() invocations on more platforms
@ 2024-02-15  0:35 Bruno Haible
  0 siblings, 0 replies; only message in thread
From: Bruno Haible @ 2024-02-15  0:35 UTC (permalink / raw)
  To: bug-gnulib

When a program calls setlocale() with some argument that is not reflected in
the environment variables, ideally the gl_locale_name function should return
that locale.

Currently this is enabled only for glibc systems. But it is actually possible
to do the same thing on some other platforms as well:
  musl, FreeBSD, NetBSD, Solaris, Haiku.

This patch does it. (This was on my TODO list for GNU gettext for many years.)


2024-02-14  Bruno Haible  <bruno@clisp.org>

	localename: Notice setlocale() invocations on more platforms.
	* lib/localename.c (HAVE_LOCALE_NULL): Define also on musl, FreeBSD,
	NetBSD, Solaris, Haiku.
	* modules/localename (Files): Add m4/musl.m4.
	(configure.ac): Invoke gl_MUSL_LIBC.
	* tests/test-localename.c (C_CANONICALIZED): New macro, for Haiku.
	(is_default): New function, for musl libc and Haiku.
	(test_locale_name, test_locale_name_posix): Use these definitions.
	* modules/localename-tests (Files): Add m4/musl.m4.
	(configure.ac): Invoke gl_MUSL_LIBC.

diff --git a/lib/localename.c b/lib/localename.c
index b4c78df8ae..3bfcdabf4f 100644
--- a/lib/localename.c
+++ b/lib/localename.c
@@ -3297,9 +3297,31 @@ gl_locale_name_thread (int category, _GL_UNUSED const char *categoryname)
    "Directs 'setlocale()' to query 'category' and return the current
     setting of 'local'."
    However it does not specify the exact format.  Neither do SUSV2 and
-   ISO C 99.  So we can use this feature only on selected systems (e.g.
-   those using GNU C Library).  */
-#if defined _LIBC || ((defined __GLIBC__ && __GLIBC__ >= 2) && !defined __UCLIBC__)
+   ISO C 99.  So we can use this feature only on selected systems, where
+   the return value has the XPG syntax
+     language[_territory][.codeset][@modifier]
+   or
+     C[.codeset]
+   namely
+     - glibc systems (except for aliases from /usr/share/locale/locale.alias,
+       that no one uses any more),
+     - musl libc,
+     - FreeBSD, NetBSD,
+     - Solaris,
+     - Haiku.
+   We cannot use it on
+     - macOS, Cygwin (because these systems have a facility for customizing the
+       default locale, and setlocale (category, NULL) ignores it and merely
+       returns "C" or "C.UTF-8"),
+     - OpenBSD (because on OpenBSD ≤ 6.1, LC_ALL does not set the LC_NUMERIC,
+       LC_TIME, LC_COLLATE, LC_MONETARY categories).
+     - AIX (because here the return value has the syntax
+         language[_script]_territory[.codeset]
+       e.g. zh_Hans_CN.UTF-8),
+     - native Windows (because it has locale names such as French_France.1252),
+     - Android (because it only supports the C and C.UTF-8 locales).
+ */
+#if defined _LIBC || ((defined __GLIBC__ && __GLIBC__ >= 2) && !defined __UCLIBC__) || MUSL_LIBC || defined __FreeBSD__ || defined __NetBSD__ || defined __sun || defined __HAIKU__
 # define HAVE_LOCALE_NULL
 #endif
 
@@ -3334,8 +3356,8 @@ gl_locale_name_posix (int category, _GL_UNUSED const char *categoryname)
     /* On other systems we ignore what setlocale reports and instead look at the
        environment variables directly.  This is necessary
          1. on systems which have a facility for customizing the default locale
-            (Mac OS X, native Windows, Cygwin) and where the system's setlocale()
-            function ignores this default locale (Mac OS X, Cygwin), in two cases:
+            (macOS, native Windows, Cygwin) and where the system's setlocale()
+            function ignores this default locale (macOS, Cygwin), in two cases:
             a. when the user missed to use the setlocale() override from libintl
                (for example by not including <libintl.h>),
             b. when setlocale supports only the "C" locale, such as on Cygwin
diff --git a/modules/localename b/modules/localename
index fc097829be..4c33624548 100644
--- a/modules/localename
+++ b/modules/localename
@@ -10,6 +10,7 @@ m4/localename.m4
 m4/intl-thread-locale.m4
 m4/intlmacosx.m4
 m4/lcmessage.m4
+m4/musl.m4
 
 Depends-on:
 extensions
@@ -26,6 +27,7 @@ thread-optim
 configure.ac:
 gl_LOCALENAME
 gl_LOCALE_MODULE_INDICATOR([localename])
+gl_MUSL_LIBC
 
 Makefile.am:
 lib_SOURCES += localename.c localename-table.c
diff --git a/modules/localename-tests b/modules/localename-tests
index f90d82da0d..0c24d5b4b6 100644
--- a/modules/localename-tests
+++ b/modules/localename-tests
@@ -1,6 +1,7 @@
 Files:
 tests/test-localename.c
 tests/macros.h
+m4/musl.m4
 
 Depends-on:
 locale
@@ -11,6 +12,7 @@ strdup
 
 configure.ac:
 gl_CHECK_FUNCS_ANDROID([newlocale], [[#include <locale.h>]])
+gl_MUSL_LIBC
 
 Makefile.am:
 TESTS += test-localename
diff --git a/tests/test-localename.c b/tests/test-localename.c
index fe31201361..03b70527d5 100644
--- a/tests/test-localename.c
+++ b/tests/test-localename.c
@@ -40,6 +40,26 @@
 # pragma GCC diagnostic ignored "-Wanalyzer-use-of-uninitialized-value"
 #endif
 
+/* The name that setlocale(,NULL) returns for the "C" locale.  */
+#ifdef __HAIKU__
+# define C_CANONICALIZED "POSIX"
+#else
+# define C_CANONICALIZED "C"
+#endif
+
+static int
+is_default (const char *name)
+{
+  return strcmp (name, gl_locale_name_default ()) == 0
+         || (strcmp (name, C_CANONICALIZED) == 0
+             && strcmp (gl_locale_name_default (), "C") == 0)
+#if MUSL_LIBC
+         || (strcmp (name, "C.UTF-8") == 0
+             && strcmp (gl_locale_name_default (), "C") == 0)
+#endif
+         ;
+}
+
 #if HAVE_GOOD_USELOCALE
 
 static struct { int cat; int mask; const char *string; } const categories[] =
@@ -110,10 +130,8 @@ test_locale_name (void)
   unsetenv ("LC_TELEPHONE");
   ret = setlocale (LC_ALL, "");
   ASSERT (ret != NULL);
-  ASSERT (strcmp (gl_locale_name (LC_MESSAGES, "LC_MESSAGES"),
-                  gl_locale_name_default ()) == 0);
-  ASSERT (strcmp (gl_locale_name (LC_NUMERIC, "LC_NUMERIC"),
-                  gl_locale_name_default ()) == 0);
+  ASSERT (is_default (gl_locale_name (LC_MESSAGES, "LC_MESSAGES")));
+  ASSERT (is_default (gl_locale_name (LC_NUMERIC, "LC_NUMERIC")));
 
   /* Check that an empty environment variable is treated like an unset
      environment variable.  */
@@ -123,32 +141,28 @@ test_locale_name (void)
   unsetenv ("LC_MESSAGES");
   unsetenv ("LANG");
   setlocale (LC_ALL, "");
-  ASSERT (strcmp (gl_locale_name (LC_MESSAGES, "LC_MESSAGES"),
-                  gl_locale_name_default ()) == 0);
+  ASSERT (is_default (gl_locale_name (LC_MESSAGES, "LC_MESSAGES")));
 
   unsetenv ("LC_ALL");
   setenv ("LC_CTYPE", "", 1);
   unsetenv ("LC_MESSAGES");
   unsetenv ("LANG");
   setlocale (LC_ALL, "");
-  ASSERT (strcmp (gl_locale_name (LC_MESSAGES, "LC_MESSAGES"),
-                  gl_locale_name_default ()) == 0);
+  ASSERT (is_default (gl_locale_name (LC_MESSAGES, "LC_MESSAGES")));
 
   unsetenv ("LC_ALL");
   unsetenv ("LC_CTYPE");
   setenv ("LC_MESSAGES", "", 1);
   unsetenv ("LANG");
   setlocale (LC_ALL, "");
-  ASSERT (strcmp (gl_locale_name (LC_MESSAGES, "LC_MESSAGES"),
-                  gl_locale_name_default ()) == 0);
+  ASSERT (is_default (gl_locale_name (LC_MESSAGES, "LC_MESSAGES")));
 
   unsetenv ("LC_ALL");
   unsetenv ("LC_CTYPE");
   unsetenv ("LC_MESSAGES");
   setenv ("LANG", "", 1);
   setlocale (LC_ALL, "");
-  ASSERT (strcmp (gl_locale_name (LC_MESSAGES, "LC_MESSAGES"),
-                  gl_locale_name_default ()) == 0);
+  ASSERT (is_default (gl_locale_name (LC_MESSAGES, "LC_MESSAGES")));
 
   /* Check that LC_ALL overrides the others, and LANG is overridden by the
      others.  */
@@ -158,21 +172,24 @@ test_locale_name (void)
   unsetenv ("LC_MESSAGES");
   unsetenv ("LANG");
   setlocale (LC_ALL, "");
-  ASSERT (strcmp (gl_locale_name (LC_MESSAGES, "LC_MESSAGES"), "C") == 0);
+  ASSERT (strcmp (gl_locale_name (LC_MESSAGES, "LC_MESSAGES"),
+                  C_CANONICALIZED) == 0);
 
   unsetenv ("LC_ALL");
   setenv ("LC_CTYPE", "C", 1);
   setenv ("LC_MESSAGES", "C", 1);
   unsetenv ("LANG");
   setlocale (LC_ALL, "");
-  ASSERT (strcmp (gl_locale_name (LC_MESSAGES, "LC_MESSAGES"), "C") == 0);
+  ASSERT (strcmp (gl_locale_name (LC_MESSAGES, "LC_MESSAGES"),
+                  C_CANONICALIZED) == 0);
 
   unsetenv ("LC_ALL");
   unsetenv ("LC_CTYPE");
   unsetenv ("LC_MESSAGES");
   setenv ("LANG", "C", 1);
   setlocale (LC_ALL, "");
-  ASSERT (strcmp (gl_locale_name (LC_MESSAGES, "LC_MESSAGES"), "C") == 0);
+  ASSERT (strcmp (gl_locale_name (LC_MESSAGES, "LC_MESSAGES"),
+                  C_CANONICALIZED) == 0);
 
   /* Check mixed situations.  */
 
@@ -204,7 +221,7 @@ test_locale_name (void)
   if (setlocale (LC_ALL, "") != NULL)
     {
       name = gl_locale_name (LC_CTYPE, "LC_CTYPE");
-      ASSERT (strcmp (name, gl_locale_name_default ()) == 0);
+      ASSERT (is_default (name));
       name = gl_locale_name (LC_MESSAGES, "LC_MESSAGES");
       ASSERT (strcmp (name, "fr_FR.UTF-8") == 0);
     }
@@ -552,9 +569,9 @@ test_locale_name_posix (void)
   ret = setlocale (LC_ALL, "");
   ASSERT (ret != NULL);
   name = gl_locale_name_posix (LC_MESSAGES, "LC_MESSAGES");
-  ASSERT (name == NULL || strcmp (name, gl_locale_name_default ()) == 0);
+  ASSERT (name == NULL || is_default (name));
   name = gl_locale_name_posix (LC_NUMERIC, "LC_NUMERIC");
-  ASSERT (name == NULL || strcmp (name, gl_locale_name_default ()) == 0);
+  ASSERT (name == NULL || is_default (name));
 
   /* Check that an empty environment variable is treated like an unset
      environment variable.  */
@@ -565,7 +582,7 @@ test_locale_name_posix (void)
   unsetenv ("LANG");
   setlocale (LC_ALL, "");
   name = gl_locale_name_posix (LC_MESSAGES, "LC_MESSAGES");
-  ASSERT (name == NULL || strcmp (name, gl_locale_name_default ()) == 0);
+  ASSERT (name == NULL || is_default (name));
 
   unsetenv ("LC_ALL");
   setenv ("LC_CTYPE", "", 1);
@@ -573,7 +590,7 @@ test_locale_name_posix (void)
   unsetenv ("LANG");
   setlocale (LC_ALL, "");
   name = gl_locale_name_posix (LC_MESSAGES, "LC_MESSAGES");
-  ASSERT (name == NULL || strcmp (name, gl_locale_name_default ()) == 0);
+  ASSERT (name == NULL || is_default (name));
 
   unsetenv ("LC_ALL");
   unsetenv ("LC_CTYPE");
@@ -581,7 +598,7 @@ test_locale_name_posix (void)
   unsetenv ("LANG");
   setlocale (LC_ALL, "");
   name = gl_locale_name_posix (LC_MESSAGES, "LC_MESSAGES");
-  ASSERT (name == NULL || strcmp (name, gl_locale_name_default ()) == 0);
+  ASSERT (name == NULL || is_default (name));
 
   unsetenv ("LC_ALL");
   unsetenv ("LC_CTYPE");
@@ -589,7 +606,7 @@ test_locale_name_posix (void)
   setenv ("LANG", "", 1);
   setlocale (LC_ALL, "");
   name = gl_locale_name_posix (LC_MESSAGES, "LC_MESSAGES");
-  ASSERT (name == NULL || strcmp (name, gl_locale_name_default ()) == 0);
+  ASSERT (name == NULL || is_default (name));
 
   /* Check that LC_ALL overrides the others, and LANG is overridden by the
      others.  */
@@ -600,7 +617,7 @@ test_locale_name_posix (void)
   unsetenv ("LANG");
   setlocale (LC_ALL, "");
   name = gl_locale_name_posix (LC_MESSAGES, "LC_MESSAGES");
-  ASSERT (strcmp (name, "C") == 0);
+  ASSERT (strcmp (name, C_CANONICALIZED) == 0);
 
   unsetenv ("LC_ALL");
   setenv ("LC_CTYPE", "C", 1);
@@ -608,7 +625,7 @@ test_locale_name_posix (void)
   unsetenv ("LANG");
   setlocale (LC_ALL, "");
   name = gl_locale_name_posix (LC_MESSAGES, "LC_MESSAGES");
-  ASSERT (strcmp (name, "C") == 0);
+  ASSERT (strcmp (name, C_CANONICALIZED) == 0);
 
   unsetenv ("LC_ALL");
   unsetenv ("LC_CTYPE");
@@ -616,7 +633,7 @@ test_locale_name_posix (void)
   setenv ("LANG", "C", 1);
   setlocale (LC_ALL, "");
   name = gl_locale_name_posix (LC_MESSAGES, "LC_MESSAGES");
-  ASSERT (strcmp (name, "C") == 0);
+  ASSERT (strcmp (name, C_CANONICALIZED) == 0);
 
   /* Check mixed situations.  */
 
@@ -643,7 +660,7 @@ test_locale_name_posix (void)
   if (setlocale (LC_ALL, "") != NULL)
     {
       name = gl_locale_name_posix (LC_CTYPE, "LC_CTYPE");
-      ASSERT (name == NULL || strcmp (name, gl_locale_name_default ()) == 0);
+      ASSERT (name == NULL || is_default (name));
       name = gl_locale_name_posix (LC_MESSAGES, "LC_MESSAGES");
       ASSERT (strcmp (name, "fr_FR.UTF-8") == 0);
     }
@@ -661,7 +678,7 @@ test_locale_name_posix (void)
         setlocale (LC_ALL, "");
         uselocale (locale);
         name = gl_locale_name_posix (LC_MESSAGES, "LC_MESSAGES");
-        ASSERT (strcmp (name, "C") == 0);
+        ASSERT (strcmp (name, C_CANONICALIZED) == 0);
         uselocale (LC_GLOBAL_LOCALE);
         freelocale (locale);
       }





^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2024-02-15  0:35 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-02-15  0:35 localename: Notice setlocale() invocations on more platforms Bruno Haible

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).