unofficial mirror of libc-alpha@sourceware.org
 help / color / mirror / Atom feed
From: Adhemerval Zanella via Libc-alpha <libc-alpha@sourceware.org>
To: libc-alpha@sourceware.org
Subject: [PATCH v2 2/2] linux: Optimize realpath stack usage
Date: Tue, 11 Aug 2020 17:07:15 -0300	[thread overview]
Message-ID: <20200811200715.3432505-2-adhemerval.zanella@linaro.org> (raw)
In-Reply-To: <20200811200715.3432505-1-adhemerval.zanella@linaro.org>

Changes from previous version:

  - Changed __resolve_path to assume 'resolved' has at least PATH_MAX.
  - Dropped 'stdlib: Enforce PATH_MAX on allocated realpath buffer'
    patch and make __resolve_path result a path larget than PATH_MAX
    if 'resolved' is NULL.
  - Use __fd_to_filename to read the procfs.
  - Remove the fstat/lstat check.

---

This optimizes the stack usage for success case (from ~8K to ~4k) and
where 'resolved' input buffer is not provided.  For ithe failure case
when the 'resolved' buffer is provided, it requires use the generic
strategy to find the path when EACESS or ENOENT is returned (this is
a GNU extension not defined in the standard).

Regarding syscalls usage, for a sucessful path without symlinks it
trades 2 syscalls (getcwd/lstat) for 3 (openat, readlink, and close).
Its is slighter better if the input contains multiple symlinks (where
Linux kernel tricks allows replace multiple lstats by only one
readlink).  For failure it depends whether the 'resolved' buffer is
provided, which will call the old strategy (and thus requiring more
syscalls in general).

Checked on x86_64-linux-gnu and i686-linux-gnu.
---
 include/stdlib.h                   |  12 +++
 stdlib/Makefile                    |   2 +-
 stdlib/canonicalize-internal.c     | 155 +++++++++++++++++++++++++++
 stdlib/canonicalize.c              | 161 +----------------------------
 stdlib/realpath.c                  |  43 ++++++++
 sysdeps/unix/sysv/linux/realpath.c |  53 ++++++++++
 6 files changed, 265 insertions(+), 161 deletions(-)
 create mode 100644 stdlib/canonicalize-internal.c
 create mode 100644 stdlib/realpath.c
 create mode 100644 sysdeps/unix/sysv/linux/realpath.c

diff --git a/include/stdlib.h b/include/stdlib.h
index ffcefd7b85..7ed9b14614 100644
--- a/include/stdlib.h
+++ b/include/stdlib.h
@@ -20,6 +20,14 @@
 
 # include <rtld-malloc.h>
 
+# ifndef PATH_MAX
+#  ifdef MAXPATHLEN
+#   define PATH_MAX MAXPATHLEN
+#  else
+#   define PATH_MAX 1024
+#  endif
+# endif
+
 extern __typeof (strtol_l) __strtol_l;
 extern __typeof (strtoul_l) __strtoul_l;
 extern __typeof (strtoll_l) __strtoll_l;
@@ -92,6 +100,10 @@ extern int __unsetenv (const char *__name) attribute_hidden;
 extern int __clearenv (void) attribute_hidden;
 extern char *__mktemp (char *__template) __THROW __nonnull ((1));
 extern char *__canonicalize_file_name (const char *__name);
+extern char *__resolve_path (const char *name, char *resolved)
+     attribute_hidden;
+extern char *__realpath_system (const char *name, char *resolved)
+     attribute_hidden;
 extern char *__realpath (const char *__name, char *__resolved);
 libc_hidden_proto (__realpath)
 extern int __ptsname_r (int __fd, char *__buf, size_t __buflen)
diff --git a/stdlib/Makefile b/stdlib/Makefile
index 7093b8a584..35ca04541f 100644
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@@ -53,7 +53,7 @@ routines	:=							      \
 	strtof strtod strtold						      \
 	strtof_l strtod_l strtold_l					      \
 	strtof_nan strtod_nan strtold_nan				      \
-	system canonicalize						      \
+	system realpath canonicalize canonicalize-internal		      \
 	a64l l64a							      \
 	rpmatch strfmon strfmon_l getsubopt xpg_basename fmtmsg		      \
 	strtoimax strtoumax wcstoimax wcstoumax				      \
diff --git a/stdlib/canonicalize-internal.c b/stdlib/canonicalize-internal.c
new file mode 100644
index 0000000000..be65fa113f
--- /dev/null
+++ b/stdlib/canonicalize-internal.c
@@ -0,0 +1,155 @@
+/* Internal function for canonicalize absolute name of a given file.
+   Copyright (C) 1996-2020 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <stdbool.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <unistd.h>
+#include <eloop-threshold.h>
+
+char *
+__resolve_path (const char *name, char *resolved)
+{
+  size_t path_max = PATH_MAX;
+  const char *start, *end;
+  char *rpath = resolved;
+  char *rpath_limit = rpath + path_max;
+  char *dest = resolved;
+  char extra_buf[PATH_MAX];
+  int num_links = 0;
+
+  if (name[0] != '/')
+    {
+      if (__getcwd (rpath, path_max) == NULL)
+	{
+	  rpath[0] = '\0';
+	  return NULL;
+	}
+      dest = __rawmemchr (rpath, '\0');
+    }
+  else
+    {
+      rpath[0] = '/';
+      dest = rpath + 1;
+    }
+
+  for (start = end = name; *start; start = end)
+    {
+      /* Skip sequence of multiple path-separators.  */
+      while (*start == '/')
+	++start;
+
+      /* Find end of path component.  */
+      for (end = start; *end && *end != '/'; ++end)
+	/* Nothing.  */;
+
+      if (end - start == 0)
+	break;
+      else if (end - start == 1 && start[0] == '.')
+	/* nothing */;
+      else if (end - start == 2 && start[0] == '.' && start[1] == '.')
+	{
+	  /* Back up to previous component, ignore if at root already.  */
+	  if (dest > rpath + 1)
+	    while ((--dest)[-1] != '/');
+	}
+      else
+	{
+	  struct stat64 st;
+
+	  if (dest[-1] != '/')
+	    *dest++ = '/';
+	  if (dest + (end - start) >= rpath_limit)
+	    {
+	      if (resolved)
+		{
+		  __set_errno (ENAMETOOLONG);
+		  if (dest > rpath + 1)
+		    dest--;
+		  *dest = '\0';
+		  return NULL;
+		}
+	      ptrdiff_t dest_offset = dest - rpath;
+	      size_t new_size = rpath_limit - rpath;
+	      if (end - start + 1 > PATH_MAX)
+		new_size += end - start + 1;
+	      else
+		new_size += PATH_MAX;
+	      char *new_rpath = (char *) realloc (rpath, new_size);
+	      if (new_rpath == NULL)
+		return NULL;
+
+	      rpath = new_rpath;
+	      rpath_limit = rpath + new_size;
+	      path_max = new_size;
+
+	      dest = rpath + dest_offset;
+	    }
+
+	  dest = __mempcpy (dest, start, end - start);
+	  *dest = '\0';
+
+	  if (__lstat64 (rpath, &st) < 0)
+	    return NULL;
+
+	  if (S_ISLNK (st.st_mode))
+	    {
+	      if (++num_links > __eloop_threshold ())
+		{
+		  __set_errno (ELOOP);
+		  return NULL;
+		}
+
+	      char buf[PATH_MAX];
+	      ssize_t n = __readlink (rpath, buf, sizeof (buf) - 1);
+	      if (n < 0)
+		return NULL;
+	      buf[n] = '\0';
+
+	      size_t len = strlen (end);
+	      if (path_max - n <= len)
+		{
+		  __set_errno (ENAMETOOLONG);
+		  return NULL;
+		}
+
+	      memmove (&extra_buf[n], end, len + 1);
+	      name = end = memcpy (extra_buf, buf, n);
+
+	      if (buf[0] == '/')
+		dest = rpath + 1;	/* It's an absolute symlink */
+	      else
+		/* Back up to previous component, ignore if at root already: */
+		if (dest > rpath + 1)
+		  while ((--dest)[-1] != '/');
+	    }
+	  else if (!S_ISDIR (st.st_mode) && *end != '\0')
+	    {
+	      __set_errno (ENOTDIR);
+	      return NULL;
+	    }
+	}
+    }
+
+  if (dest > rpath + 1 && dest[-1] == '/')
+    --dest;
+  *dest = '\0';
+
+  return rpath;
+}
diff --git a/stdlib/canonicalize.c b/stdlib/canonicalize.c
index 554ba221e4..f4ab528a15 100644
--- a/stdlib/canonicalize.c
+++ b/stdlib/canonicalize.c
@@ -16,26 +16,11 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <assert.h>
 #include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <limits.h>
-#include <sys/stat.h>
 #include <errno.h>
-#include <stddef.h>
 
-#include <eloop-threshold.h>
 #include <shlib-compat.h>
 
-#ifndef PATH_MAX
-# ifdef MAXPATHLEN
-#  define PATH_MAX MAXPATHLEN
-# else
-#  define PATH_MAX 1024
-# endif
-#endif
-
 /* Return the canonical absolute name of file NAME.  A canonical name
    does not contain any `.', `..' components nor any repeated path
    separators ('/') or symlinks.  All path components must exist.  If
@@ -50,10 +35,6 @@
 char *
 __realpath (const char *name, char *resolved)
 {
-  char *rpath, *dest, extra_buf[PATH_MAX];
-  const char *start, *end, *rpath_limit;
-  int num_links = 0;
-
   if (name == NULL)
     {
       /* As per Single Unix Specification V2 we must return an error if
@@ -72,147 +53,7 @@ __realpath (const char *name, char *resolved)
       return NULL;
     }
 
-  if (resolved == NULL)
-    {
-      rpath = malloc (PATH_MAX);
-      if (rpath == NULL)
-	return NULL;
-    }
-  else
-    rpath = resolved;
-  rpath_limit = rpath + PATH_MAX;
-
-  if (name[0] != '/')
-    {
-      if (!__getcwd (rpath, PATH_MAX))
-	{
-	  rpath[0] = '\0';
-	  goto error;
-	}
-      dest = __rawmemchr (rpath, '\0');
-    }
-  else
-    {
-      rpath[0] = '/';
-      dest = rpath + 1;
-    }
-
-  for (start = end = name; *start; start = end)
-    {
-      struct stat64 st;
-      int n;
-
-      /* Skip sequence of multiple path-separators.  */
-      while (*start == '/')
-	++start;
-
-      /* Find end of path component.  */
-      for (end = start; *end && *end != '/'; ++end)
-	/* Nothing.  */;
-
-      if (end - start == 0)
-	break;
-      else if (end - start == 1 && start[0] == '.')
-	/* nothing */;
-      else if (end - start == 2 && start[0] == '.' && start[1] == '.')
-	{
-	  /* Back up to previous component, ignore if at root already.  */
-	  if (dest > rpath + 1)
-	    while ((--dest)[-1] != '/');
-	}
-      else
-	{
-	  size_t new_size;
-
-	  if (dest[-1] != '/')
-	    *dest++ = '/';
-
-	  if (dest + (end - start) >= rpath_limit)
-	    {
-	      ptrdiff_t dest_offset = dest - rpath;
-	      char *new_rpath;
-
-	      if (resolved)
-		{
-		  __set_errno (ENAMETOOLONG);
-		  if (dest > rpath + 1)
-		    dest--;
-		  *dest = '\0';
-		  goto error;
-		}
-	      new_size = rpath_limit - rpath;
-	      if (end - start + 1 > PATH_MAX)
-		new_size += end - start + 1;
-	      else
-		new_size += PATH_MAX;
-	      new_rpath = (char *) realloc (rpath, new_size);
-	      if (new_rpath == NULL)
-		goto error;
-	      rpath = new_rpath;
-	      rpath_limit = rpath + new_size;
-
-	      dest = rpath + dest_offset;
-	    }
-
-	  dest = __mempcpy (dest, start, end - start);
-	  *dest = '\0';
-
-	  if (__lxstat64 (_STAT_VER, rpath, &st) < 0)
-	    goto error;
-
-	  if (S_ISLNK (st.st_mode))
-	    {
-	      char buf[PATH_MAX];
-	      size_t len;
-
-	      if (++num_links > __eloop_threshold ())
-		{
-		  __set_errno (ELOOP);
-		  goto error;
-		}
-
-	      n = __readlink (rpath, buf, sizeof (buf) - 1);
-	      if (n < 0)
-		goto error;
-	      buf[n] = '\0';
-
-	      len = strlen (end);
-	      if (PATH_MAX - n <= len)
-		{
-		  __set_errno (ENAMETOOLONG);
-		  goto error;
-		}
-
-	      /* Careful here, end may be a pointer into extra_buf... */
-	      memmove (&extra_buf[n], end, len + 1);
-	      name = end = memcpy (extra_buf, buf, n);
-
-	      if (buf[0] == '/')
-		dest = rpath + 1;	/* It's an absolute symlink */
-	      else
-		/* Back up to previous component, ignore if at root already: */
-		if (dest > rpath + 1)
-		  while ((--dest)[-1] != '/');
-	    }
-	  else if (!S_ISDIR (st.st_mode) && *end != '\0')
-	    {
-	      __set_errno (ENOTDIR);
-	      goto error;
-	    }
-	}
-    }
-  if (dest > rpath + 1 && dest[-1] == '/')
-    --dest;
-  *dest = '\0';
-
-  assert (resolved == NULL || resolved == rpath);
-  return rpath;
-
-error:
-  assert (resolved == NULL || resolved == rpath);
-  if (resolved == NULL)
-    free (rpath);
-  return NULL;
+  return __realpath_system (name, resolved);
 }
 libc_hidden_def (__realpath)
 versioned_symbol (libc, __realpath, realpath, GLIBC_2_3);
diff --git a/stdlib/realpath.c b/stdlib/realpath.c
new file mode 100644
index 0000000000..d482f900d0
--- /dev/null
+++ b/stdlib/realpath.c
@@ -0,0 +1,43 @@
+/* Return the canonical absolute name of a given file.
+   Copyright (C) 1996-2020 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <stdlib.h>
+#include <errno.h>
+#include <shlib-compat.h>
+
+char *
+__realpath_system (const char *name, char *resolved)
+{
+  bool resolved_malloc = false;
+  if (resolved == NULL)
+    {
+      resolved = malloc (PATH_MAX);
+      if (resolved == NULL)
+	return NULL;
+      resolved_malloc = true;
+    }
+
+  char *r = __resolve_path (name, resolved, PATH_MAX);
+  if (r == NULL)
+    {
+      if (resolved_malloc)
+      	free (resolved);
+      return NULL;
+    }
+  return r;
+}
diff --git a/sysdeps/unix/sysv/linux/realpath.c b/sysdeps/unix/sysv/linux/realpath.c
new file mode 100644
index 0000000000..0b141e3103
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/realpath.c
@@ -0,0 +1,53 @@
+/* Return the canonical absolute name of a given file.  Linux version.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <stdlib.h>
+#include <errno.h>
+#include <not-cancel.h>
+#include <fd_to_filename.h>
+#include <shlib-compat.h>
+
+char *
+__realpath_system (const char *name, char *resolved)
+{
+  int fd = __open64_nocancel (name, O_PATH | O_NONBLOCK | O_CLOEXEC);
+  if (fd == -1)
+    {
+      /* If the call fails with either EACCES or ENOENT and resolved_path is
+	 not NULL, then the prefix of path that is not readable or does not
+	 exist is returned in resolved_path.  This is a GNU extension.  */
+      if (resolved != NULL)
+	__resolve_path (name, resolved);
+      return NULL;
+    }
+
+  struct fd_to_filename fdfilename;
+  char path[PATH_MAX];
+
+  char *procname = __fd_to_filename (fd, &fdfilename);
+  ssize_t len = __readlink (procname, path, sizeof (path) - 1);
+  if (len < 0)
+    {
+      __close_nocancel_nostatus (fd);
+      return NULL;
+    }
+  path[len] = '\0';
+  __close_nocancel_nostatus (fd);
+
+  return resolved != NULL ? strcpy (resolved, path) : __strdup (path);
+}
-- 
2.25.1


      reply	other threads:[~2020-08-11 20:07 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-08-11 20:07 [PATCH v2 1/2] stdlib: Use fixed buffer size for realpath (BZ #26341) Adhemerval Zanella via Libc-alpha
2020-08-11 20:07 ` Adhemerval Zanella via Libc-alpha [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/libc/involved.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200811200715.3432505-2-adhemerval.zanella@linaro.org \
    --to=libc-alpha@sourceware.org \
    --cc=adhemerval.zanella@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).