bug-gnulib@gnu.org mirror (unofficial)
 help / color / mirror / Atom feed
* [PATCH 1/2] posix: User scratch_buffer on fnmatch
@ 2021-01-04 20:25 Adhemerval Zanella
  2021-01-04 20:25 ` [PATCH 2/2] posix: Remove alloca usage for internal fnmatch implementation Adhemerval Zanella
  2021-01-04 20:35 ` [PATCH 1/2] posix: User scratch_buffer on fnmatch Florian Weimer
  0 siblings, 2 replies; 16+ messages in thread
From: Adhemerval Zanella @ 2021-01-04 20:25 UTC (permalink / raw)
  To: libc-alpha, Paul Eggert; +Cc: bug-gnulib

It removes the alloca usage on the string convertion to wide characters
before calling the internal function.

Checked on x86_64-linux-gnu.
---
 posix/fnmatch.c | 152 +++++++++++++++++-------------------------------
 1 file changed, 53 insertions(+), 99 deletions(-)

diff --git a/posix/fnmatch.c b/posix/fnmatch.c
index 5896812c96..ac254fc9ac 100644
--- a/posix/fnmatch.c
+++ b/posix/fnmatch.c
@@ -75,6 +75,7 @@ extern int fnmatch (const char *pattern, const char *string, int flags);
 
 #include <intprops.h>
 #include <flexmember.h>
+#include <scratch_buffer.h>
 
 #ifdef _LIBC
 typedef ptrdiff_t idx_t;
@@ -231,119 +232,72 @@ is_char_class (const wchar_t *wcs)
 
 #include "fnmatch_loop.c"
 
+static int
+fnmatch_convert_to_wide (const char *str, struct scratch_buffer *buf,
+                         size_t *n)
+{
+  mbstate_t ps;
+  memset (&ps, '\0', sizeof (ps));
+
+  size_t nw = buf->length / sizeof (wchar_t);
+  *n = strnlen (str, nw - 1);
+  if (__glibc_likely (*n < nw))
+    {
+      const char *p = str;
+      *n = mbsrtowcs (buf->data, &p, *n + 1, &ps);
+      if (__glibc_unlikely (*n == (size_t) -1))
+        /* Something wrong.
+           XXX Do we have to set 'errno' to something which mbsrtows hasn't
+           already done?  */
+        return -1;
+      if (p == NULL)
+        return 0;
+      memset (&ps, '\0', sizeof (ps));
+    }
+
+  *n = mbsrtowcs (NULL, &str, 0, &ps);
+  if (__glibc_unlikely (*n == (size_t) -1))
+    return -1;
+  if (!scratch_buffer_set_array_size (buf, *n + 1, sizeof (wchar_t)))
+    {
+      __set_errno (ENOMEM);
+      return -2;
+    }
+  assert (mbsinit (&ps));
+  mbsrtowcs (buf->data, &str, *n + 1, &ps);
+  return 0;
+}
 
 int
 fnmatch (const char *pattern, const char *string, int flags)
 {
   if (__glibc_unlikely (MB_CUR_MAX != 1))
     {
-      mbstate_t ps;
       size_t n;
-      const char *p;
-      wchar_t *wpattern_malloc = NULL;
-      wchar_t *wpattern;
-      wchar_t *wstring_malloc = NULL;
-      wchar_t *wstring;
-      size_t alloca_used = 0;
+      struct scratch_buffer wpattern;
+      scratch_buffer_init (&wpattern);
+      struct scratch_buffer wstring;
+      scratch_buffer_init (&wstring);
+      int r;
 
       /* Convert the strings into wide characters.  */
-      memset (&ps, '\0', sizeof (ps));
-      p = pattern;
-      n = strnlen (pattern, 1024);
-      if (__glibc_likely (n < 1024))
-        {
-          wpattern = (wchar_t *) alloca_account ((n + 1) * sizeof (wchar_t),
-                                                 alloca_used);
-          n = mbsrtowcs (wpattern, &p, n + 1, &ps);
-          if (__glibc_unlikely (n == (size_t) -1))
-            /* Something wrong.
-               XXX Do we have to set 'errno' to something which mbsrtows hasn't
-               already done?  */
-            return -1;
-          if (p)
-            {
-              memset (&ps, '\0', sizeof (ps));
-              goto prepare_wpattern;
-            }
-        }
-      else
-        {
-        prepare_wpattern:
-          n = mbsrtowcs (NULL, &pattern, 0, &ps);
-          if (__glibc_unlikely (n == (size_t) -1))
-            /* Something wrong.
-               XXX Do we have to set 'errno' to something which mbsrtows hasn't
-               already done?  */
-            return -1;
-          if (__glibc_unlikely (n >= (size_t) -1 / sizeof (wchar_t)))
-            {
-              __set_errno (ENOMEM);
-              return -2;
-            }
-          wpattern_malloc = wpattern
-            = (wchar_t *) malloc ((n + 1) * sizeof (wchar_t));
-          assert (mbsinit (&ps));
-          if (wpattern == NULL)
-            return -2;
-          (void) mbsrtowcs (wpattern, &pattern, n + 1, &ps);
-        }
-
-      assert (mbsinit (&ps));
-      n = strnlen (string, 1024);
-      p = string;
-      if (__glibc_likely (n < 1024))
-        {
-          wstring = (wchar_t *) alloca_account ((n + 1) * sizeof (wchar_t),
-                                                alloca_used);
-          n = mbsrtowcs (wstring, &p, n + 1, &ps);
-          if (__glibc_unlikely (n == (size_t) -1))
-            {
-              /* Something wrong.
-                 XXX Do we have to set 'errno' to something which
-                 mbsrtows hasn't already done?  */
-            free_return:
-              free (wpattern_malloc);
-              return -1;
-            }
-          if (p)
-            {
-              memset (&ps, '\0', sizeof (ps));
-              goto prepare_wstring;
-            }
-        }
-      else
+      r = fnmatch_convert_to_wide (pattern, &wpattern, &n);
+      if (r != 0)
+        return r;
+      r = fnmatch_convert_to_wide (string, &wstring, &n);
+      if (r != 0)
         {
-        prepare_wstring:
-          n = mbsrtowcs (NULL, &string, 0, &ps);
-          if (__glibc_unlikely (n == (size_t) -1))
-            /* Something wrong.
-               XXX Do we have to set 'errno' to something which mbsrtows hasn't
-               already done?  */
-            goto free_return;
-          if (__glibc_unlikely (n >= (size_t) -1 / sizeof (wchar_t)))
-            {
-              free (wpattern_malloc);
-              __set_errno (ENOMEM);
-              return -2;
-            }
-
-          wstring_malloc = wstring
-            = (wchar_t *) malloc ((n + 1) * sizeof (wchar_t));
-          if (wstring == NULL)
-            {
-              free (wpattern_malloc);
-              return -2;
-            }
-          assert (mbsinit (&ps));
-          (void) mbsrtowcs (wstring, &string, n + 1, &ps);
+          scratch_buffer_free (&wpattern);
+          return n;
         }
 
-      int res = internal_fnwmatch (wpattern, wstring, wstring + n,
+      int res = internal_fnwmatch (wpattern.data, wstring.data,
+                                   (wchar_t *) wstring.data + n,
                                    flags & FNM_PERIOD, flags, NULL,
-                                   alloca_used);
+                                   false);
 
-      free (wstring_malloc);
-      free (wpattern_malloc);
+      scratch_buffer_free (&wstring);
+      scratch_buffer_free (&wpattern);
 
       return res;
     }
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/2] posix: Remove alloca usage for internal fnmatch implementation
  2021-01-04 20:25 [PATCH 1/2] posix: User scratch_buffer on fnmatch Adhemerval Zanella
@ 2021-01-04 20:25 ` Adhemerval Zanella
  2021-03-08 12:59   ` Florian Weimer
  2021-01-04 20:35 ` [PATCH 1/2] posix: User scratch_buffer on fnmatch Florian Weimer
  1 sibling, 1 reply; 16+ messages in thread
From: Adhemerval Zanella @ 2021-01-04 20:25 UTC (permalink / raw)
  To: libc-alpha, Paul Eggert; +Cc: bug-gnulib

This patch replaces the internal fnmatch pattern list generation
to use a dynamic array.

Checked on x86_64-linux-gnu.
---
 posix/fnmatch.c      |  24 +-----
 posix/fnmatch_loop.c | 190 +++++++++++++++++++------------------------
 2 files changed, 87 insertions(+), 127 deletions(-)

diff --git a/posix/fnmatch.c b/posix/fnmatch.c
index ac254fc9ac..2a6186b594 100644
--- a/posix/fnmatch.c
+++ b/posix/fnmatch.c
@@ -31,9 +31,6 @@
 #include <ctype.h>
 #include <string.h>
 #include <stdlib.h>
-#if defined _LIBC || HAVE_ALLOCA
-# include <alloca.h>
-#endif
 #include <wchar.h>
 #include <wctype.h>
 #include <stddef.h>
@@ -87,22 +84,6 @@ typedef ptrdiff_t idx_t;
 #define NO_LEADING_PERIOD(flags) \
   ((flags & (FNM_FILE_NAME | FNM_PERIOD)) == (FNM_FILE_NAME | FNM_PERIOD))
 
-#ifndef _LIBC
-# if HAVE_ALLOCA
-/* The OS usually guarantees only one guard page at the bottom of the stack,
-   and a page size can be as small as 4096 bytes.  So we cannot safely
-   allocate anything larger than 4096 bytes.  Also care for the possibility
-   of a few compiler-allocated temporary stack slots.  */
-#  define __libc_use_alloca(n) ((n) < 4032)
-# else
-/* Just use malloc.  */
-#  define __libc_use_alloca(n) false
-#  undef alloca
-#  define alloca(n) malloc (n)
-# endif
-# define alloca_account(size, avar) ((avar) += (size), alloca (size))
-#endif
-
 /* Provide support for user-defined character classes, based on the functions
    from ISO C 90 amendment 1.  */
 #ifdef CHARCLASS_NAME_MAX
@@ -293,8 +274,7 @@ fnmatch (const char *pattern, const char *string, int flags)
 
       int res = internal_fnwmatch (wpattern.data, wstring.data,
                                    (wchar_t *) wstring.data + n,
-                                   flags & FNM_PERIOD, flags, NULL,
-                                   false);
+                                   flags & FNM_PERIOD, flags, NULL);
 
       scratch_buffer_free (&wstring);
       scratch_buffer_free (&wpattern);
@@ -303,7 +283,7 @@ fnmatch (const char *pattern, const char *string, int flags)
     }
 
   return internal_fnmatch (pattern, string, string + strlen (string),
-                           flags & FNM_PERIOD, flags, NULL, 0);
+                           flags & FNM_PERIOD, flags, NULL);
 }
 
 #undef fnmatch
diff --git a/posix/fnmatch_loop.c b/posix/fnmatch_loop.c
index 7f938af590..69f78f0fd8 100644
--- a/posix/fnmatch_loop.c
+++ b/posix/fnmatch_loop.c
@@ -30,15 +30,14 @@ struct STRUCT
    it matches, nonzero if not.  */
 static int FCT (const CHAR *pattern, const CHAR *string,
                 const CHAR *string_end, bool no_leading_period, int flags,
-                struct STRUCT *ends, size_t alloca_used);
+                struct STRUCT *ends);
 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
-                const CHAR *string_end, bool no_leading_period, int flags,
-                size_t alloca_used);
+                const CHAR *string_end, bool no_leading_period, int flags);
 static const CHAR *END (const CHAR *patternp);
 
 static int
 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
-     bool no_leading_period, int flags, struct STRUCT *ends, size_t alloca_used)
+     bool no_leading_period, int flags, struct STRUCT *ends)
 {
   const CHAR *p = pattern, *n = string;
   UCHAR c;
@@ -62,8 +61,7 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
         case L_('?'):
           if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(')
             {
-              int res = EXT (c, p, n, string_end, no_leading_period,
-                             flags, alloca_used);
+              int res = EXT (c, p, n, string_end, no_leading_period, flags);
               if (res != -1)
                 return res;
             }
@@ -92,8 +90,7 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
         case L_('*'):
           if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(')
             {
-              int res = EXT (c, p, n, string_end, no_leading_period,
-                             flags, alloca_used);
+              int res = EXT (c, p, n, string_end, no_leading_period, flags);
               if (res != -1)
                 return res;
             }
@@ -182,7 +179,7 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
 
                   for (--p; n < endp; ++n, no_leading_period = false)
                     if (FCT (p, n, string_end, no_leading_period, flags2,
-                             &end, alloca_used) == 0)
+                             &end) == 0)
                       goto found;
                 }
               else if (c == L_('/') && (flags & FNM_FILE_NAME))
@@ -191,7 +188,7 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
                     ++n;
                   if (n < string_end && *n == L_('/')
                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags,
-                               NULL, alloca_used) == 0))
+                               NULL) == 0))
                     return 0;
                 }
               else
@@ -205,7 +202,7 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
                   for (--p; n < endp; ++n, no_leading_period = false)
                     if (FOLD ((UCHAR) *n) == c
                         && (FCT (p, n, string_end, no_leading_period, flags2,
-                                 &end, alloca_used) == 0))
+                                 &end) == 0))
                       {
                       found:
                         if (end.pattern == NULL)
@@ -892,8 +889,7 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
         case L_('!'):
           if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(')
             {
-              int res = EXT (c, p, n, string_end, no_leading_period, flags,
-                             alloca_used);
+              int res = EXT (c, p, n, string_end, no_leading_period, flags);
               if (res != -1)
                 return res;
             }
@@ -972,26 +968,37 @@ END (const CHAR *pattern)
   return p + 1;
 }
 
+#if WIDE_CHAR_VERSION
+# define PATTERN_PREFIX pattern_list
+#else
+# define PATTERN_PREFIX wpattern_list
+#endif
+
+#define PASTE(a,b)                 PASTE1(a,b)
+#define PASTE1(a,b)                a##b
+
+#define DYNARRAY_STRUCT            PATTERN_PREFIX
+#define DYNARRAY_ELEMENT_FREE(ptr) free (*ptr)
+#define DYNARRAY_ELEMENT           CHAR *
+#define DYNARRAY_PREFIX            PASTE(PATTERN_PREFIX,_)
+#define DYNARRAY_INITIAL_SIZE      8
+#include <malloc/dynarray-skeleton.c>
 
 static int
 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
-     bool no_leading_period, int flags, size_t alloca_used)
+     bool no_leading_period, int flags)
 {
   const CHAR *startp;
   ptrdiff_t level;
-  struct patternlist
-  {
-    struct patternlist *next;
-    CHAR malloced;
-    CHAR str __flexarr;
-  } *list = NULL;
-  struct patternlist **lastp = &list;
+  struct PATTERN_PREFIX list;
   size_t pattern_len = STRLEN (pattern);
-  bool any_malloced = false;
+  size_t pattern_i = 0;
   const CHAR *p;
   const CHAR *rs;
   int retval = 0;
 
+  PASTE (PATTERN_PREFIX, _init) (&list);
+
   /* Parse the pattern.  Store the individual parts in the list.  */
   level = 0;
   for (startp = p = pattern + 1; level >= 0; ++p)
@@ -1027,74 +1034,48 @@ EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
               || *p == L_('!')) && p[1] == L_('('))
       /* Remember the nesting level.  */
       ++level;
-    else if (*p == L_(')'))
-      {
-        if (level-- == 0)
-          {
-            /* This means we found the end of the pattern.  */
-#define NEW_PATTERN \
-            struct patternlist *newp;                                         \
-            size_t plen = (opt == L_('?') || opt == L_('@')                   \
-                           ? pattern_len : (p - startp + 1UL));               \
-            idx_t slen = FLEXSIZEOF (struct patternlist, str, 0);             \
-            idx_t new_used = alloca_used + slen;                              \
-            idx_t plensize;                                                   \
-            if (INT_MULTIPLY_WRAPV (plen, sizeof (CHAR), &plensize)           \
-                || INT_ADD_WRAPV (new_used, plensize, &new_used))             \
-              {                                                               \
-                retval = -2;                                                  \
-                goto out;                                                     \
-              }                                                               \
-            slen += plensize;                                                 \
-            bool malloced = ! __libc_use_alloca (new_used);                   \
-            if (__glibc_unlikely (malloced))                                  \
-              {                                                               \
-                newp = malloc (slen);                                         \
-                if (newp == NULL)                                             \
-                  {                                                           \
-                    retval = -2;                                              \
-                    goto out;                                                 \
-                  }                                                           \
-                any_malloced = true;                                          \
-              }                                                               \
-            else                                                              \
-              newp = alloca_account (slen, alloca_used);                      \
-            newp->next = NULL;                                                \
-            newp->malloced = malloced;                                        \
-            *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L_('\0');   \
-            *lastp = newp;                                                    \
-            lastp = &newp->next
-            NEW_PATTERN;
-          }
-      }
-    else if (*p == L_('|'))
+    else if (*p == L_(')') || *p == L_('|'))
       {
         if (level == 0)
           {
-            NEW_PATTERN;
-            startp = p + 1;
+            size_t slen = opt == L_('?') || opt == L_('@')
+			  ? pattern_len : p - startp + 1;
+            CHAR *newp = malloc (slen * sizeof (CHAR));
+            if (newp != NULL)
+              {
+                *((CHAR *) MEMPCPY (newp, startp, p - startp)) = L_('\0');
+                PASTE (PATTERN_PREFIX,_add) (&list, newp);
+              }
+            if (newp == NULL || PASTE (PATTERN_PREFIX, _has_failed) (&list))
+              {
+                retval = -2;
+                goto out;
+              }
+
+            if (*p == L_('|'))
+              startp = p + 1;
           }
+        if (*p == L_(')'))
+	  level--;
       }
-  assert (list != NULL);
   assert (p[-1] == L_(')'));
-#undef NEW_PATTERN
 
   switch (opt)
     {
     case L_('*'):
-      if (FCT (p, string, string_end, no_leading_period, flags, NULL,
-               alloca_used) == 0)
+      if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0)
         goto success;
       FALLTHROUGH;
     case L_('+'):
-      do
+      for (; pattern_i < PASTE (PATTERN_PREFIX, _size)(&list); pattern_i++)
         {
           for (rs = string; rs <= string_end; ++rs)
             /* First match the prefix with the current pattern with the
                current pattern.  */
-            if (FCT (list->str, string, rs, no_leading_period,
+            if (FCT (*PASTE (PATTERN_PREFIX, _at) (&list, pattern_i), string,
+                     rs, no_leading_period,
                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
-                     NULL, alloca_used) == 0
+                     NULL) == 0
                 /* This was successful.  Now match the rest with the rest
                    of the pattern.  */
                 && (FCT (p, rs, string_end,
@@ -1102,7 +1083,7 @@ EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
                          ? no_leading_period
                          : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
                          flags & FNM_FILE_NAME
-                         ? flags : flags & ~FNM_PERIOD, NULL, alloca_used) == 0
+                         ? flags : flags & ~FNM_PERIOD, NULL) == 0
                     /* This didn't work.  Try the whole pattern.  */
                     || (rs != string
                         && FCT (pattern - 1, rs, string_end,
@@ -1110,35 +1091,33 @@ EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
                                 ? no_leading_period
                                 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
                                 flags & FNM_FILE_NAME
-                                ? flags : flags & ~FNM_PERIOD, NULL,
-                                alloca_used) == 0)))
+                                ? flags : flags & ~FNM_PERIOD, NULL) == 0)))
               /* It worked.  Signal success.  */
               goto success;
         }
-      while ((list = list->next) != NULL);
 
       /* None of the patterns lead to a match.  */
       retval = FNM_NOMATCH;
       break;
 
     case L_('?'):
-      if (FCT (p, string, string_end, no_leading_period, flags, NULL,
-               alloca_used) == 0)
+      if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0)
         goto success;
       FALLTHROUGH;
     case L_('@'):
-      do
-        /* I cannot believe it but 'strcat' is actually acceptable
-           here.  Match the entire string with the prefix from the
-           pattern list and the rest of the pattern following the
-           pattern list.  */
-        if (FCT (STRCAT (list->str, p), string, string_end,
-                 no_leading_period,
-                 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
-                 NULL, alloca_used) == 0)
-          /* It worked.  Signal success.  */
-          goto success;
-      while ((list = list->next) != NULL);
+      for (; pattern_i < PASTE (PATTERN_PREFIX, _size) (&list); pattern_i++)
+        {
+          /* I cannot believe it but `strcat' is actually acceptable
+             here.  Match the entire string with the prefix from the
+             pattern list and the rest of the pattern following the
+             pattern list.  */
+          if (FCT (STRCAT (*PASTE (PATTERN_PREFIX, _at) (&list, pattern_i), p),
+                   string, string_end, no_leading_period,
+                   flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
+                   NULL) == 0)
+            /* It worked.  Signal success.  */
+            goto success;
+        }
 
       /* None of the patterns lead to a match.  */
       retval = FNM_NOMATCH;
@@ -1147,22 +1126,27 @@ EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
     case L_('!'):
       for (rs = string; rs <= string_end; ++rs)
         {
-          struct patternlist *runp;
+	  size_t runp_i;
 
-          for (runp = list; runp != NULL; runp = runp->next)
-            if (FCT (runp->str, string, rs,  no_leading_period,
-                     flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
-                     NULL, alloca_used) == 0)
+          for (runp_i = pattern_i;
+               runp_i != PASTE (PATTERN_PREFIX, _size) (&list);
+               runp_i++)
+            {
+              if (FCT (*PASTE (PATTERN_PREFIX, _at) (&list, runp_i), string, rs,
+                       no_leading_period,
+                       flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
+                       NULL) == 0)
               break;
+            }
 
           /* If none of the patterns matched see whether the rest does.  */
-          if (runp == NULL
+          if (runp_i == PASTE (PATTERN_PREFIX, _size) (&list)
               && (FCT (p, rs, string_end,
                        rs == string
                        ? no_leading_period
                        : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
-                       NULL, alloca_used) == 0))
+                       NULL) == 0))
             /* This is successful.  */
             goto success;
         }
@@ -1180,18 +1164,14 @@ EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
 
  success:
  out:
-  if (any_malloced)
-    while (list != NULL)
-      {
-        struct patternlist *old = list;
-        list = list->next;
-        if (old->malloced)
-          free (old);
-      }
+  PASTE (PATTERN_PREFIX, _free) (&list);
 
   return retval;
 }
 
+#undef PATTERN_PREFIX
+#undef PASTE
+#undef PASTE1
 
 #undef FOLD
 #undef CHAR
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] posix: User scratch_buffer on fnmatch
  2021-01-04 20:25 [PATCH 1/2] posix: User scratch_buffer on fnmatch Adhemerval Zanella
  2021-01-04 20:25 ` [PATCH 2/2] posix: Remove alloca usage for internal fnmatch implementation Adhemerval Zanella
@ 2021-01-04 20:35 ` Florian Weimer
  2021-01-05 13:07   ` Adhemerval Zanella
  1 sibling, 1 reply; 16+ messages in thread
From: Florian Weimer @ 2021-01-04 20:35 UTC (permalink / raw)
  To: Adhemerval Zanella via Libc-alpha
  Cc: Paul Eggert, bug-gnulib, Adhemerval Zanella

* Adhemerval Zanella via Libc-alpha:

> It removes the alloca usage on the string convertion to wide characters
> before calling the internal function.

We have a downstream-only patch to fall back the single byte handling in
case of multibyte decoding failure.  Basically it's a quick hack to fix
this bug:

  <https://sourceware.org/bugzilla/show_bug.cgi?id=14185>

Is this something we should upstream?  Or rework fnmatch so that * is
matched properly against arbitrary bytes?

Thanks,
Florian
-- 
Red Hat GmbH, https://de.redhat.com/ , Registered seat: Grasbrunn,
Commercial register: Amtsgericht Muenchen, HRB 153243,
Managing Directors: Charles Cachera, Brian Klemm, Laurie Krebs, Michael O'Neill



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] posix: User scratch_buffer on fnmatch
  2021-01-04 20:35 ` [PATCH 1/2] posix: User scratch_buffer on fnmatch Florian Weimer
@ 2021-01-05 13:07   ` Adhemerval Zanella
  2021-01-13 19:25     ` Paul Eggert
  0 siblings, 1 reply; 16+ messages in thread
From: Adhemerval Zanella @ 2021-01-05 13:07 UTC (permalink / raw)
  To: Florian Weimer, Adhemerval Zanella via Libc-alpha, Paul Eggert; +Cc: bug-gnulib



On 04/01/2021 17:35, Florian Weimer wrote:
> * Adhemerval Zanella via Libc-alpha:
> 
>> It removes the alloca usage on the string convertion to wide characters
>> before calling the internal function.
> 
> We have a downstream-only patch to fall back the single byte handling in
> case of multibyte decoding failure.  Basically it's a quick hack to fix
> this bug:
> 
>   <https://sourceware.org/bugzilla/show_bug.cgi?id=14185>
> 
> Is this something we should upstream?  Or rework fnmatch so that * is
> matched properly against arbitrary bytes?

It seems that gnulib has added the proposed fix with 
aed23714d60d91b2abc74be33635c32ddc5132b5 (done in 2005) and just recently
with a glibc merge with 67306f600fe6a3bcf3fbb6d8bf4b8953b74a8fb7 (done in
2020 to sync back glibc changes) it has fallback to old semantic to return
-1 on in case of failure.  

I am not sure if gnulib was intentional or an overlook. But I am slight
worried about the issues raised by Rich in comment #4, where fnmatch would
match wrong patterns that happen to have invalid multibyte sequence.  Maybe
gnulib guys can gives us some insight here about the realword and if the
67306f600fe6a3 was intentional or not.

I have started to check the feasibility or making the Rich suggestions at
comment #7, to make fnmatch not go in the temporary buffer and call
mbrtowc.  It should be feasible, however it would require more extensive
change in the algorithm and some work to optimize for MB_CUR_MAX == 1
(where mbrtowc should not be necessary).








^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] posix: User scratch_buffer on fnmatch
  2021-01-05 13:07   ` Adhemerval Zanella
@ 2021-01-13 19:25     ` Paul Eggert
  2021-01-13 19:39       ` Florian Weimer
  2021-01-14 11:44       ` [PATCH 1/2] posix: User scratch_buffer on fnmatch Adhemerval Zanella
  0 siblings, 2 replies; 16+ messages in thread
From: Paul Eggert @ 2021-01-13 19:25 UTC (permalink / raw)
  To: Adhemerval Zanella
  Cc: Florian Weimer, Adhemerval Zanella via Libc-alpha, bug-gnulib

On 1/5/21 5:07 AM, Adhemerval Zanella wrote:
> It seems that gnulib has added the proposed fix with
> aed23714d60d91b2abc74be33635c32ddc5132b5 (done in 2005) and just recently
> with a glibc merge with 67306f600fe6a3bcf3fbb6d8bf4b8953b74a8fb7 (done in
> 2020 to sync back glibc changes) it has fallback to old semantic to return
> -1 on in case of failure.
> 
> I am not sure if gnulib was intentional or an overlook.

It was an oversight. I simply missed the issue when I did the merge.

> I have started to check the feasibility or making the Rich suggestions at
> comment #7,

That's the right way to go. I would not spend much time trying to fix 
the bugs in the existing code. We should rip out all the wide-char 
stuff, treat encoding errors as if they were just another "character" 
(that's what Emacs does), and stay in the multibyte world. We could 
steal some ideas from Emacs here.

By the way, how important is it to support awful encodings like 
shift-JIS that contain bytes that look like '\'? If we don't have to 
support these encodings any more, things get a bit easier. (Asking for a 
friend. :-)


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] posix: User scratch_buffer on fnmatch
  2021-01-13 19:25     ` Paul Eggert
@ 2021-01-13 19:39       ` Florian Weimer
  2021-01-13 23:36         ` Bruno Haible
  2021-01-14 11:44       ` [PATCH 1/2] posix: User scratch_buffer on fnmatch Adhemerval Zanella
  1 sibling, 1 reply; 16+ messages in thread
From: Florian Weimer @ 2021-01-13 19:39 UTC (permalink / raw)
  To: Paul Eggert
  Cc: Adhemerval Zanella via Libc-alpha, bug-gnulib, Adhemerval Zanella

* Paul Eggert:

> By the way, how important is it to support awful encodings like
> shift-JIS that contain bytes that look like '\'? If we don't have to 
> support these encodings any more, things get a bit easier. (Asking for
> a friend. :-)

There is a Shift-JIS variant which is ASCII-transparent (Windows-31J,
it's also specified by WhatWG/HTML5), so from a glibc point of view, it
would be just an ordinary charset like any other.

But feedback we have received is that the users who want Shift-JIS
really want the original thing.

We do not presently support either variant downstream, but one potential
way forward would be to turn Windows-31J into a fully supported glibc
charset with a corresponding ja_JP locale (which would imply downstream
support as well), and just hope that it displaces the original Shift-JIS
in the future.

Thanks,
Florian
-- 
Red Hat GmbH, https://de.redhat.com/ , Registered seat: Grasbrunn,
Commercial register: Amtsgericht Muenchen, HRB 153243,
Managing Directors: Charles Cachera, Brian Klemm, Laurie Krebs, Michael O'Neill



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] posix: User scratch_buffer on fnmatch
  2021-01-13 19:39       ` Florian Weimer
@ 2021-01-13 23:36         ` Bruno Haible
  2021-01-14 10:00           ` Florian Weimer
  0 siblings, 1 reply; 16+ messages in thread
From: Bruno Haible @ 2021-01-13 23:36 UTC (permalink / raw)
  To: bug-gnulib; +Cc: Florian Weimer, Paul Eggert, libc-alpha, Adhemerval Zanella

Paul Eggert asked:
> > By the way, how important is it to support awful encodings like
> > shift-JIS that contain bytes that look like '\'? If we don't have to 
> > support these encodings any more, things get a bit easier.

Here we are talking about locale encodings, and Shift_JIS (as well as
SHIFT_JISX0213) are not usable as a locale encoding in glibc. See e.g.
[1], [2].

That's the reason why no Shift_JIS locale is listed in
glibc/localedata/SUPPORTED. [3]

Florian Weimer wrote:
> There is a Shift-JIS variant which is ASCII-transparent (Windows-31J,
> it's also specified by WhatWG/HTML5), so from a glibc point of view, it
> would be just an ordinary charset like any other.
> 
> But feedback we have received is that the users who want Shift-JIS
> really want the original thing.
> 
> We do not presently support either variant downstream, but one potential
> way forward would be to turn Windows-31J into a fully supported glibc
> charset with a corresponding ja_JP locale (which would imply downstream
> support as well), and just hope that it displaces the original Shift-JIS
> in the future.

I don't think there's a real need for that. In the years 1995 ... 2005 there
was a lot of resistence against Unicode in Japan, because Unicode maps
several slightly differently looking glyph images to the same glyph/character
(even for Western encodings, for example the Polish accents look a bit different
than the French ones), and - at the time - Unicode did not have means to
disambiguate these, thus people complained about "characters are rendered
incorrectly if you use Unicode". This has been resolved for more than 10 years
already.

Bruno

[1] https://sourceware.org/bugzilla/show_bug.cgi?id=3140
[2] https://sourceware.org/legacy-ml/libc-alpha/2000-10/msg00311.html
[3] https://sourceware.org/git/?p=glibc.git;a=blob;f=localedata/SUPPORTED



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] posix: User scratch_buffer on fnmatch
  2021-01-13 23:36         ` Bruno Haible
@ 2021-01-14 10:00           ` Florian Weimer
  2021-03-06 17:18             ` Paul Eggert
  0 siblings, 1 reply; 16+ messages in thread
From: Florian Weimer @ 2021-01-14 10:00 UTC (permalink / raw)
  To: Bruno Haible; +Cc: libc-alpha, Paul Eggert, bug-gnulib, Adhemerval Zanella

* Bruno Haible:

> Paul Eggert asked:
>> > By the way, how important is it to support awful encodings like
>> > shift-JIS that contain bytes that look like '\'? If we don't have to 
>> > support these encodings any more, things get a bit easier.
>
> Here we are talking about locale encodings, and Shift_JIS (as well as
> SHIFT_JISX0213) are not usable as a locale encoding in glibc. See e.g.
> [1], [2].
>
> That's the reason why no Shift_JIS locale is listed in
> glibc/localedata/SUPPORTED. [3]

> [1] https://sourceware.org/bugzilla/show_bug.cgi?id=3140
> [2] https://sourceware.org/legacy-ml/libc-alpha/2000-10/msg00311.html
> [3] https://sourceware.org/git/?p=glibc.git;a=blob;f=localedata/SUPPORTED

We used to have a fully supported product based on the original
Shift-JIS.  It did not require glibc changes (we package both localedef
and the locale sources, so it's easy to build custom locales), but other
GNU components had to be patched.

> Florian Weimer wrote:
>> There is a Shift-JIS variant which is ASCII-transparent (Windows-31J,
>> it's also specified by WhatWG/HTML5), so from a glibc point of view, it
>> would be just an ordinary charset like any other.
>> 
>> But feedback we have received is that the users who want Shift-JIS
>> really want the original thing.
>> 
>> We do not presently support either variant downstream, but one potential
>> way forward would be to turn Windows-31J into a fully supported glibc
>> charset with a corresponding ja_JP locale (which would imply downstream
>> support as well), and just hope that it displaces the original Shift-JIS
>> in the future.
>
> I don't think there's a real need for that. In the years 1995 ... 2005
> there was a lot of resistence against Unicode in Japan, because
> Unicode maps several slightly differently looking glyph images to the
> same glyph/character (even for Western encodings, for example the
> Polish accents look a bit different than the French ones), and - at
> the time - Unicode did not have means to disambiguate these, thus
> people complained about "characters are rendered incorrectly if you
> use Unicode". This has been resolved for more than 10 years already.

We saw commercial demand for Shift-JIS much later than that.  I think an
official Windows-31J-based ja_JP would still be welcomed at this point.

A Windows-31J locale could be added to localedata/SUPPORTED.  We have
not done that yet because someone wanted to look into alignment between
Windows, HTML/WhatWG and what we currently have in the source tree, but
that hasn't happened yet, unfortunately.

Thanks,
Florian
-- 
Red Hat GmbH, https://de.redhat.com/ , Registered seat: Grasbrunn,
Commercial register: Amtsgericht Muenchen, HRB 153243,
Managing Directors: Charles Cachera, Brian Klemm, Laurie Krebs, Michael O'Neill



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] posix: User scratch_buffer on fnmatch
  2021-01-13 19:25     ` Paul Eggert
  2021-01-13 19:39       ` Florian Weimer
@ 2021-01-14 11:44       ` Adhemerval Zanella
  2021-01-15  6:56         ` Paul Eggert
  1 sibling, 1 reply; 16+ messages in thread
From: Adhemerval Zanella @ 2021-01-14 11:44 UTC (permalink / raw)
  To: Paul Eggert; +Cc: Florian Weimer, Adhemerval Zanella via Libc-alpha, bug-gnulib



On 13/01/2021 16:25, Paul Eggert wrote:
> On 1/5/21 5:07 AM, Adhemerval Zanella wrote:
>> It seems that gnulib has added the proposed fix with
>> aed23714d60d91b2abc74be33635c32ddc5132b5 (done in 2005) and just recently
>> with a glibc merge with 67306f600fe6a3bcf3fbb6d8bf4b8953b74a8fb7 (done in
>> 2020 to sync back glibc changes) it has fallback to old semantic to return
>> -1 on in case of failure.
>>
>> I am not sure if gnulib was intentional or an overlook.
> 
> It was an oversight. I simply missed the issue when I did the merge.
> 
>> I have started to check the feasibility or making the Rich suggestions at
>> comment #7,
> 
> That's the right way to go. I would not spend much time trying to fix the bugs in the existing code. We should rip out all the wide-char stuff, treat encoding errors as if they were just another "character" (that's what Emacs does), and stay in the multibyte world. We could steal some ideas from Emacs here.

I think we still should fix BZ#14185 with the suggested with the strategy of
falling back to non wide mode in case of encoding error.  

The full fix will take some time, since it is pretty much a rewrite of the
fnmatch_loop.c.

> 
> By the way, how important is it to support awful encodings like shift-JIS that contain bytes that look like '\'? If we don't have to support these encodings any more, things get a bit easier. (Asking for a friend. :-)


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] posix: User scratch_buffer on fnmatch
  2021-01-14 11:44       ` [PATCH 1/2] posix: User scratch_buffer on fnmatch Adhemerval Zanella
@ 2021-01-15  6:56         ` Paul Eggert
  0 siblings, 0 replies; 16+ messages in thread
From: Paul Eggert @ 2021-01-15  6:56 UTC (permalink / raw)
  To: Adhemerval Zanella
  Cc: Florian Weimer, Adhemerval Zanella via Libc-alpha, bug-gnulib

On 1/14/21 3:44 AM, Adhemerval Zanella wrote:
> I think we still should fix BZ#14185 with the suggested with the strategy of
> falling back to non wide mode in case of encoding error.

For glibc 2.33 that's likely the best we can do.

> The full fix will take some time, since it is pretty much a rewrite of the
> fnmatch_loop.c.

Yes. Should be doable for 2.34 though, I'd hope.


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 2/2] posix: Remove alloca usage for internal fnmatch implementation
  2021-02-02 13:08 [PATCH 1/2] posix: Falling back to non wide mode in case of encoding error [BZ #14185] Adhemerval Zanella
@ 2021-02-02 13:08 ` Adhemerval Zanella
  0 siblings, 0 replies; 16+ messages in thread
From: Adhemerval Zanella @ 2021-02-02 13:08 UTC (permalink / raw)
  To: libc-alpha; +Cc: Paul Eggert, bug-gnulib

This patch replaces the internal fnmatch pattern list generation
to use a dynamic array.

Checked on x86_64-linux-gnu.
---
 posix/fnmatch.c      |  24 +-----
 posix/fnmatch_loop.c | 190 +++++++++++++++++++------------------------
 2 files changed, 87 insertions(+), 127 deletions(-)

diff --git a/posix/fnmatch.c b/posix/fnmatch.c
index a66c9196c7..51080ab833 100644
--- a/posix/fnmatch.c
+++ b/posix/fnmatch.c
@@ -31,9 +31,6 @@
 #include <ctype.h>
 #include <string.h>
 #include <stdlib.h>
-#if defined _LIBC || HAVE_ALLOCA
-# include <alloca.h>
-#endif
 #include <wchar.h>
 #include <wctype.h>
 #include <stddef.h>
@@ -87,22 +84,6 @@ typedef ptrdiff_t idx_t;
 #define NO_LEADING_PERIOD(flags) \
   ((flags & (FNM_FILE_NAME | FNM_PERIOD)) == (FNM_FILE_NAME | FNM_PERIOD))
 
-#ifndef _LIBC
-# if HAVE_ALLOCA
-/* The OS usually guarantees only one guard page at the bottom of the stack,
-   and a page size can be as small as 4096 bytes.  So we cannot safely
-   allocate anything larger than 4096 bytes.  Also care for the possibility
-   of a few compiler-allocated temporary stack slots.  */
-#  define __libc_use_alloca(n) ((n) < 4032)
-# else
-/* Just use malloc.  */
-#  define __libc_use_alloca(n) false
-#  undef alloca
-#  define alloca(n) malloc (n)
-# endif
-# define alloca_account(size, avar) ((avar) += (size), alloca (size))
-#endif
-
 /* Provide support for user-defined character classes, based on the functions
    from ISO C 90 amendment 1.  */
 #ifdef CHARCLASS_NAME_MAX
@@ -289,8 +270,7 @@ fnmatch (const char *pattern, const char *string, int flags)
           if (r == 0)
             r = internal_fnwmatch (wpattern.data, wstring.data,
                                    (wchar_t *) wstring.data + n,
-                                   flags & FNM_PERIOD, flags, NULL,
-                                   false);
+                                   flags & FNM_PERIOD, flags, NULL);
         }
 
       scratch_buffer_free (&wstring);
@@ -301,7 +281,7 @@ fnmatch (const char *pattern, const char *string, int flags)
     }
 
   return internal_fnmatch (pattern, string, string + strlen (string),
-                           flags & FNM_PERIOD, flags, NULL, 0);
+                           flags & FNM_PERIOD, flags, NULL);
 }
 
 #undef fnmatch
diff --git a/posix/fnmatch_loop.c b/posix/fnmatch_loop.c
index 7f938af590..69f78f0fd8 100644
--- a/posix/fnmatch_loop.c
+++ b/posix/fnmatch_loop.c
@@ -30,15 +30,14 @@ struct STRUCT
    it matches, nonzero if not.  */
 static int FCT (const CHAR *pattern, const CHAR *string,
                 const CHAR *string_end, bool no_leading_period, int flags,
-                struct STRUCT *ends, size_t alloca_used);
+                struct STRUCT *ends);
 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
-                const CHAR *string_end, bool no_leading_period, int flags,
-                size_t alloca_used);
+                const CHAR *string_end, bool no_leading_period, int flags);
 static const CHAR *END (const CHAR *patternp);
 
 static int
 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
-     bool no_leading_period, int flags, struct STRUCT *ends, size_t alloca_used)
+     bool no_leading_period, int flags, struct STRUCT *ends)
 {
   const CHAR *p = pattern, *n = string;
   UCHAR c;
@@ -62,8 +61,7 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
         case L_('?'):
           if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(')
             {
-              int res = EXT (c, p, n, string_end, no_leading_period,
-                             flags, alloca_used);
+              int res = EXT (c, p, n, string_end, no_leading_period, flags);
               if (res != -1)
                 return res;
             }
@@ -92,8 +90,7 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
         case L_('*'):
           if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(')
             {
-              int res = EXT (c, p, n, string_end, no_leading_period,
-                             flags, alloca_used);
+              int res = EXT (c, p, n, string_end, no_leading_period, flags);
               if (res != -1)
                 return res;
             }
@@ -182,7 +179,7 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
 
                   for (--p; n < endp; ++n, no_leading_period = false)
                     if (FCT (p, n, string_end, no_leading_period, flags2,
-                             &end, alloca_used) == 0)
+                             &end) == 0)
                       goto found;
                 }
               else if (c == L_('/') && (flags & FNM_FILE_NAME))
@@ -191,7 +188,7 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
                     ++n;
                   if (n < string_end && *n == L_('/')
                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags,
-                               NULL, alloca_used) == 0))
+                               NULL) == 0))
                     return 0;
                 }
               else
@@ -205,7 +202,7 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
                   for (--p; n < endp; ++n, no_leading_period = false)
                     if (FOLD ((UCHAR) *n) == c
                         && (FCT (p, n, string_end, no_leading_period, flags2,
-                                 &end, alloca_used) == 0))
+                                 &end) == 0))
                       {
                       found:
                         if (end.pattern == NULL)
@@ -892,8 +889,7 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
         case L_('!'):
           if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(')
             {
-              int res = EXT (c, p, n, string_end, no_leading_period, flags,
-                             alloca_used);
+              int res = EXT (c, p, n, string_end, no_leading_period, flags);
               if (res != -1)
                 return res;
             }
@@ -972,26 +968,37 @@ END (const CHAR *pattern)
   return p + 1;
 }
 
+#if WIDE_CHAR_VERSION
+# define PATTERN_PREFIX pattern_list
+#else
+# define PATTERN_PREFIX wpattern_list
+#endif
+
+#define PASTE(a,b)                 PASTE1(a,b)
+#define PASTE1(a,b)                a##b
+
+#define DYNARRAY_STRUCT            PATTERN_PREFIX
+#define DYNARRAY_ELEMENT_FREE(ptr) free (*ptr)
+#define DYNARRAY_ELEMENT           CHAR *
+#define DYNARRAY_PREFIX            PASTE(PATTERN_PREFIX,_)
+#define DYNARRAY_INITIAL_SIZE      8
+#include <malloc/dynarray-skeleton.c>
 
 static int
 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
-     bool no_leading_period, int flags, size_t alloca_used)
+     bool no_leading_period, int flags)
 {
   const CHAR *startp;
   ptrdiff_t level;
-  struct patternlist
-  {
-    struct patternlist *next;
-    CHAR malloced;
-    CHAR str __flexarr;
-  } *list = NULL;
-  struct patternlist **lastp = &list;
+  struct PATTERN_PREFIX list;
   size_t pattern_len = STRLEN (pattern);
-  bool any_malloced = false;
+  size_t pattern_i = 0;
   const CHAR *p;
   const CHAR *rs;
   int retval = 0;
 
+  PASTE (PATTERN_PREFIX, _init) (&list);
+
   /* Parse the pattern.  Store the individual parts in the list.  */
   level = 0;
   for (startp = p = pattern + 1; level >= 0; ++p)
@@ -1027,74 +1034,48 @@ EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
               || *p == L_('!')) && p[1] == L_('('))
       /* Remember the nesting level.  */
       ++level;
-    else if (*p == L_(')'))
-      {
-        if (level-- == 0)
-          {
-            /* This means we found the end of the pattern.  */
-#define NEW_PATTERN \
-            struct patternlist *newp;                                         \
-            size_t plen = (opt == L_('?') || opt == L_('@')                   \
-                           ? pattern_len : (p - startp + 1UL));               \
-            idx_t slen = FLEXSIZEOF (struct patternlist, str, 0);             \
-            idx_t new_used = alloca_used + slen;                              \
-            idx_t plensize;                                                   \
-            if (INT_MULTIPLY_WRAPV (plen, sizeof (CHAR), &plensize)           \
-                || INT_ADD_WRAPV (new_used, plensize, &new_used))             \
-              {                                                               \
-                retval = -2;                                                  \
-                goto out;                                                     \
-              }                                                               \
-            slen += plensize;                                                 \
-            bool malloced = ! __libc_use_alloca (new_used);                   \
-            if (__glibc_unlikely (malloced))                                  \
-              {                                                               \
-                newp = malloc (slen);                                         \
-                if (newp == NULL)                                             \
-                  {                                                           \
-                    retval = -2;                                              \
-                    goto out;                                                 \
-                  }                                                           \
-                any_malloced = true;                                          \
-              }                                                               \
-            else                                                              \
-              newp = alloca_account (slen, alloca_used);                      \
-            newp->next = NULL;                                                \
-            newp->malloced = malloced;                                        \
-            *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L_('\0');   \
-            *lastp = newp;                                                    \
-            lastp = &newp->next
-            NEW_PATTERN;
-          }
-      }
-    else if (*p == L_('|'))
+    else if (*p == L_(')') || *p == L_('|'))
       {
         if (level == 0)
           {
-            NEW_PATTERN;
-            startp = p + 1;
+            size_t slen = opt == L_('?') || opt == L_('@')
+			  ? pattern_len : p - startp + 1;
+            CHAR *newp = malloc (slen * sizeof (CHAR));
+            if (newp != NULL)
+              {
+                *((CHAR *) MEMPCPY (newp, startp, p - startp)) = L_('\0');
+                PASTE (PATTERN_PREFIX,_add) (&list, newp);
+              }
+            if (newp == NULL || PASTE (PATTERN_PREFIX, _has_failed) (&list))
+              {
+                retval = -2;
+                goto out;
+              }
+
+            if (*p == L_('|'))
+              startp = p + 1;
           }
+        if (*p == L_(')'))
+	  level--;
       }
-  assert (list != NULL);
   assert (p[-1] == L_(')'));
-#undef NEW_PATTERN
 
   switch (opt)
     {
     case L_('*'):
-      if (FCT (p, string, string_end, no_leading_period, flags, NULL,
-               alloca_used) == 0)
+      if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0)
         goto success;
       FALLTHROUGH;
     case L_('+'):
-      do
+      for (; pattern_i < PASTE (PATTERN_PREFIX, _size)(&list); pattern_i++)
         {
           for (rs = string; rs <= string_end; ++rs)
             /* First match the prefix with the current pattern with the
                current pattern.  */
-            if (FCT (list->str, string, rs, no_leading_period,
+            if (FCT (*PASTE (PATTERN_PREFIX, _at) (&list, pattern_i), string,
+                     rs, no_leading_period,
                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
-                     NULL, alloca_used) == 0
+                     NULL) == 0
                 /* This was successful.  Now match the rest with the rest
                    of the pattern.  */
                 && (FCT (p, rs, string_end,
@@ -1102,7 +1083,7 @@ EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
                          ? no_leading_period
                          : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
                          flags & FNM_FILE_NAME
-                         ? flags : flags & ~FNM_PERIOD, NULL, alloca_used) == 0
+                         ? flags : flags & ~FNM_PERIOD, NULL) == 0
                     /* This didn't work.  Try the whole pattern.  */
                     || (rs != string
                         && FCT (pattern - 1, rs, string_end,
@@ -1110,35 +1091,33 @@ EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
                                 ? no_leading_period
                                 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
                                 flags & FNM_FILE_NAME
-                                ? flags : flags & ~FNM_PERIOD, NULL,
-                                alloca_used) == 0)))
+                                ? flags : flags & ~FNM_PERIOD, NULL) == 0)))
               /* It worked.  Signal success.  */
               goto success;
         }
-      while ((list = list->next) != NULL);
 
       /* None of the patterns lead to a match.  */
       retval = FNM_NOMATCH;
       break;
 
     case L_('?'):
-      if (FCT (p, string, string_end, no_leading_period, flags, NULL,
-               alloca_used) == 0)
+      if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0)
         goto success;
       FALLTHROUGH;
     case L_('@'):
-      do
-        /* I cannot believe it but 'strcat' is actually acceptable
-           here.  Match the entire string with the prefix from the
-           pattern list and the rest of the pattern following the
-           pattern list.  */
-        if (FCT (STRCAT (list->str, p), string, string_end,
-                 no_leading_period,
-                 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
-                 NULL, alloca_used) == 0)
-          /* It worked.  Signal success.  */
-          goto success;
-      while ((list = list->next) != NULL);
+      for (; pattern_i < PASTE (PATTERN_PREFIX, _size) (&list); pattern_i++)
+        {
+          /* I cannot believe it but `strcat' is actually acceptable
+             here.  Match the entire string with the prefix from the
+             pattern list and the rest of the pattern following the
+             pattern list.  */
+          if (FCT (STRCAT (*PASTE (PATTERN_PREFIX, _at) (&list, pattern_i), p),
+                   string, string_end, no_leading_period,
+                   flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
+                   NULL) == 0)
+            /* It worked.  Signal success.  */
+            goto success;
+        }
 
       /* None of the patterns lead to a match.  */
       retval = FNM_NOMATCH;
@@ -1147,22 +1126,27 @@ EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
     case L_('!'):
       for (rs = string; rs <= string_end; ++rs)
         {
-          struct patternlist *runp;
+	  size_t runp_i;
 
-          for (runp = list; runp != NULL; runp = runp->next)
-            if (FCT (runp->str, string, rs,  no_leading_period,
-                     flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
-                     NULL, alloca_used) == 0)
+          for (runp_i = pattern_i;
+               runp_i != PASTE (PATTERN_PREFIX, _size) (&list);
+               runp_i++)
+            {
+              if (FCT (*PASTE (PATTERN_PREFIX, _at) (&list, runp_i), string, rs,
+                       no_leading_period,
+                       flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
+                       NULL) == 0)
               break;
+            }
 
           /* If none of the patterns matched see whether the rest does.  */
-          if (runp == NULL
+          if (runp_i == PASTE (PATTERN_PREFIX, _size) (&list)
               && (FCT (p, rs, string_end,
                        rs == string
                        ? no_leading_period
                        : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
-                       NULL, alloca_used) == 0))
+                       NULL) == 0))
             /* This is successful.  */
             goto success;
         }
@@ -1180,18 +1164,14 @@ EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
 
  success:
  out:
-  if (any_malloced)
-    while (list != NULL)
-      {
-        struct patternlist *old = list;
-        list = list->next;
-        if (old->malloced)
-          free (old);
-      }
+  PASTE (PATTERN_PREFIX, _free) (&list);
 
   return retval;
 }
 
+#undef PATTERN_PREFIX
+#undef PASTE
+#undef PASTE1
 
 #undef FOLD
 #undef CHAR
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] posix: User scratch_buffer on fnmatch
  2021-01-14 10:00           ` Florian Weimer
@ 2021-03-06 17:18             ` Paul Eggert
  2021-03-06 20:17               ` dealing with non-ASCII-safe encodings Bruno Haible
  0 siblings, 1 reply; 16+ messages in thread
From: Paul Eggert @ 2021-03-06 17:18 UTC (permalink / raw)
  To: Florian Weimer, Bruno Haible; +Cc: libc-alpha, bug-gnulib, Adhemerval Zanella

On 1/14/21 2:00 AM, Florian Weimer wrote:
> We saw commercial demand for Shift-JIS much later than that.

Yes, I see Red Hat has a product enhancement for that, for coreutils, 
dated 2019.[1]

Shift-JIS is still very much alive and kicking in Japan in its own 
circles. Even DBCS EBCDIC is still used among old mainframe 
applications.[2] I very much doubt whether we need to worry about DBCS 
EBCDIC in GNU software, though.

Shift-JIS is a closer call as it's used a lot more than DBCS EBCDIC. 
However, my worry is that good support for non-ASCII-safe encodings like 
Shift-JIS is hard to do, and that any such support we'd add to 
Gnulib/coreutils/etc. would not only increase maintenance costs and 
reduce runtime performance, but also would be buggy in many oddball 
corner cases (and this would have security implications).

[1] https://access.redhat.com/errata/RHEA-2019:0008

[2] 
https://www.ibm.com/support/knowledgecenter/en/SS4PJT_6.0.0/cd_zos/cdzos_admin/ZOS_Overview_of_DBCS.html


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: dealing with non-ASCII-safe encodings
  2021-03-06 17:18             ` Paul Eggert
@ 2021-03-06 20:17               ` Bruno Haible
  0 siblings, 0 replies; 16+ messages in thread
From: Bruno Haible @ 2021-03-06 20:17 UTC (permalink / raw)
  To: Paul Eggert; +Cc: Florian Weimer, libc-alpha, bug-gnulib, Adhemerval Zanella

Paul Eggert wrote:
> However, my worry is that good support for non-ASCII-safe encodings like 
> Shift-JIS is hard to do, and that any such support we'd add to 
> Gnulib/coreutils/etc. would not only increase maintenance costs and 
> reduce runtime performance

Shift_JIS is not the only non-ASCII-safe encoding; GB18030, BIG5, BIG5-HKSCS,
and GBK are as well, and among these GB18030 is used as locale encoding
in China. Therefore it is important for programs to support these locale
encodings.

Gnulib has the support for it:

  - It has replacement functions that operate correctly with these locale
    encodings:
      strstr, c_strstr -> mbsstr
      strchr -> mbschr
      strrchr -> mbsrchr
      strspn -> mbsspn
      strcspn -> mbscspn
      strpbrk -> mbspbrk
      strsep -> mbssep
      strtok_r -> mbstok_r

  - It has warnings (through _GL_WARN_ON_USE) for uses of the functions
    that are not OK for non-ASCII-safe encodings.

  - It has modules mbchar, mbiter, mbfile for iterating through the
    multibyte characters of a string or file, that work for all locale
    encodings.

Yes, it does reduce the performance to use these safer functions.
I have shown in the past, through coreutils patches, how to accommodate
both a "fast path" and a "safe path" in the same binary.

Bruno



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] posix: Remove alloca usage for internal fnmatch implementation
  2021-01-04 20:25 ` [PATCH 2/2] posix: Remove alloca usage for internal fnmatch implementation Adhemerval Zanella
@ 2021-03-08 12:59   ` Florian Weimer
  2021-10-20 15:12     ` Adhemerval Zanella
  0 siblings, 1 reply; 16+ messages in thread
From: Florian Weimer @ 2021-03-08 12:59 UTC (permalink / raw)
  To: Adhemerval Zanella via Libc-alpha
  Cc: Paul Eggert, bug-gnulib, Adhemerval Zanella

* Adhemerval Zanella via Libc-alpha:

> -    else if (*p == L_('|'))
> +    else if (*p == L_(')') || *p == L_('|'))
>        {
>          if (level == 0)
>            {
> -            NEW_PATTERN;
> -            startp = p + 1;
> +            size_t slen = opt == L_('?') || opt == L_('@')
> +			  ? pattern_len : p - startp + 1;
> +            CHAR *newp = malloc (slen * sizeof (CHAR));
> +            if (newp != NULL)
> +              {
> +                *((CHAR *) MEMPCPY (newp, startp, p - startp)) = L_('\0');
> +                PASTE (PATTERN_PREFIX,_add) (&list, newp);
> +              }
> +            if (newp == NULL || PASTE (PATTERN_PREFIX, _has_failed) (&list))
> +              {
> +                retval = -2;
> +                goto out;
> +              }
> +
> +            if (*p == L_('|'))
> +              startp = p + 1;
>            }

slen seems to be the wrong variable name.  But I don't know wh the
original code computes plen conditionally and then uses p - startp
unconditionally.  That seems wrong.  The discrepancy goes back to
821a6bb4360.  Do you see a case where the difference matters?

The == 0 checks for the recursive FCT calls are wrong because they treat
match failure the same as OOM and other errors (the -2 return value),
but that also is a pre-existing issue.

The conversation itself appears to be faithful.

Thanks,
Florian



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] posix: Remove alloca usage for internal fnmatch implementation
  2021-03-08 12:59   ` Florian Weimer
@ 2021-10-20 15:12     ` Adhemerval Zanella
  2021-10-21  9:54       ` Florian Weimer
  0 siblings, 1 reply; 16+ messages in thread
From: Adhemerval Zanella @ 2021-10-20 15:12 UTC (permalink / raw)
  To: Florian Weimer, Adhemerval Zanella via Libc-alpha; +Cc: Paul Eggert, bug-gnulib



On 08/03/2021 09:59, Florian Weimer wrote:
> * Adhemerval Zanella via Libc-alpha:
> 
>> -    else if (*p == L_('|'))
>> +    else if (*p == L_(')') || *p == L_('|'))
>>        {
>>          if (level == 0)
>>            {
>> -            NEW_PATTERN;
>> -            startp = p + 1;
>> +            size_t slen = opt == L_('?') || opt == L_('@')
>> +			  ? pattern_len : p - startp + 1;
>> +            CHAR *newp = malloc (slen * sizeof (CHAR));
>> +            if (newp != NULL)
>> +              {
>> +                *((CHAR *) MEMPCPY (newp, startp, p - startp)) = L_('\0');
>> +                PASTE (PATTERN_PREFIX,_add) (&list, newp);
>> +              }
>> +            if (newp == NULL || PASTE (PATTERN_PREFIX, _has_failed) (&list))
>> +              {
>> +                retval = -2;
>> +                goto out;
>> +              }
>> +
>> +            if (*p == L_('|'))
>> +              startp = p + 1;
>>            }
> 
> slen seems to be the wrong variable name.  But I don't know wh the
> original code computes plen conditionally and then uses p - startp
> unconditionally.  That seems wrong.  The discrepancy goes back to
> 821a6bb4360.  Do you see a case where the difference matters?
> 
> The == 0 checks for the recursive FCT calls are wrong because they treat
> match failure the same as OOM and other errors (the -2 return value),
> but that also is a pre-existing issue.
> 
> The conversation itself appears to be faithful.

Hi Florian,

I noted this patch [1] is marked accepted, was you the one that
accepted it? In any case, are you still ok with the change?


[1] https://patchwork.sourceware.org/project/glibc/patch/20210202130804.1920933-2-adhemerval.zanella@linaro.org/


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] posix: Remove alloca usage for internal fnmatch implementation
  2021-10-20 15:12     ` Adhemerval Zanella
@ 2021-10-21  9:54       ` Florian Weimer
  0 siblings, 0 replies; 16+ messages in thread
From: Florian Weimer @ 2021-10-21  9:54 UTC (permalink / raw)
  To: Adhemerval Zanella
  Cc: bug-gnulib, Paul Eggert, Adhemerval Zanella via Libc-alpha

* Adhemerval Zanella:

> On 08/03/2021 09:59, Florian Weimer wrote:
>> * Adhemerval Zanella via Libc-alpha:
>> 
>>> -    else if (*p == L_('|'))
>>> +    else if (*p == L_(')') || *p == L_('|'))
>>>        {
>>>          if (level == 0)
>>>            {
>>> -            NEW_PATTERN;
>>> -            startp = p + 1;
>>> +            size_t slen = opt == L_('?') || opt == L_('@')
>>> +			  ? pattern_len : p - startp + 1;
>>> +            CHAR *newp = malloc (slen * sizeof (CHAR));
>>> +            if (newp != NULL)
>>> +              {
>>> +                *((CHAR *) MEMPCPY (newp, startp, p - startp)) = L_('\0');
>>> +                PASTE (PATTERN_PREFIX,_add) (&list, newp);
>>> +              }
>>> +            if (newp == NULL || PASTE (PATTERN_PREFIX, _has_failed) (&list))
>>> +              {
>>> +                retval = -2;
>>> +                goto out;
>>> +              }
>>> +
>>> +            if (*p == L_('|'))
>>> +              startp = p + 1;
>>>            }
>> 
>> slen seems to be the wrong variable name.  But I don't know wh the
>> original code computes plen conditionally and then uses p - startp
>> unconditionally.  That seems wrong.  The discrepancy goes back to
>> 821a6bb4360.  Do you see a case where the difference matters?
>> 
>> The == 0 checks for the recursive FCT calls are wrong because they treat
>> match failure the same as OOM and other errors (the -2 return value),
>> but that also is a pre-existing issue.
>> 
>> The conversation itself appears to be faithful.
>
> Hi Florian,
>
> I noted this patch [1] is marked accepted, was you the one that
> accepted it? In any case, are you still ok with the change?
>
>
> [1] https://patchwork.sourceware.org/project/glibc/patch/20210202130804.1920933-2-adhemerval.zanella@linaro.org/

I think all the issues I identified are pre-existing, and as I said, the
conversion to remove alloca appears to be correct.

Thanks,
Florian



^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2021-10-21  9:56 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-04 20:25 [PATCH 1/2] posix: User scratch_buffer on fnmatch Adhemerval Zanella
2021-01-04 20:25 ` [PATCH 2/2] posix: Remove alloca usage for internal fnmatch implementation Adhemerval Zanella
2021-03-08 12:59   ` Florian Weimer
2021-10-20 15:12     ` Adhemerval Zanella
2021-10-21  9:54       ` Florian Weimer
2021-01-04 20:35 ` [PATCH 1/2] posix: User scratch_buffer on fnmatch Florian Weimer
2021-01-05 13:07   ` Adhemerval Zanella
2021-01-13 19:25     ` Paul Eggert
2021-01-13 19:39       ` Florian Weimer
2021-01-13 23:36         ` Bruno Haible
2021-01-14 10:00           ` Florian Weimer
2021-03-06 17:18             ` Paul Eggert
2021-03-06 20:17               ` dealing with non-ASCII-safe encodings Bruno Haible
2021-01-14 11:44       ` [PATCH 1/2] posix: User scratch_buffer on fnmatch Adhemerval Zanella
2021-01-15  6:56         ` Paul Eggert
  -- strict thread matches above, loose matches on Subject: below --
2021-02-02 13:08 [PATCH 1/2] posix: Falling back to non wide mode in case of encoding error [BZ #14185] Adhemerval Zanella
2021-02-02 13:08 ` [PATCH 2/2] posix: Remove alloca usage for internal fnmatch implementation Adhemerval Zanella

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).