unofficial mirror of libc-alpha@sourceware.org
 help / color / mirror / Atom feed
From: Adhemerval Zanella via Libc-alpha <libc-alpha@sourceware.org>
To: libc-alpha@sourceware.org,
	Cupertino Miranda <cupertino.miranda@oracle.com>,
	Wilco Dijkstra <Wilco.Dijkstra@arm.com>
Subject: [PATCH] nptl: Disable THP on thread stack if it incurs in large RSS usage
Date: Thu, 20 Apr 2023 14:24:36 -0300	[thread overview]
Message-ID: <20230420172436.2013698-1-adhemerval.zanella@linaro.org> (raw)

If Transparent Huge Page (THP) is set to 'always', the thread stack might
be backed by Huge Pages depending of the asigned address by the kernel and
the resulting guard position.  If the guard page is within the same large
page that might be used by the stack itself, changing stack permission
will make the allocated range no longer be server with THP.  The kernel
will then revert back using default page size.

In this case, besides the aditional work, the kernel will need to potential
keep all the pages since it can not distinguish which one was really touched
by the process.  This result in a large RSS usage than just madvise the range
to not use huge pages.

The glibc.pthread.stack_hugetlb is still useful for the case where the
caller either setup no guard page or a guard page multiple of default THP
size.  In this case, the kernel might potentially backup the stack with
THP, but it would be up to the thread stack profile if THP is benefitial
or not.

Checked on x86_64-linux-gnu.
---
 nptl/allocatestack.c                       | 34 ++++++++++++++++
 sysdeps/generic/malloc-hugepages.h         |  1 +
 sysdeps/unix/sysv/linux/malloc-hugepages.c | 46 ++++++++++++++++++----
 3 files changed, 74 insertions(+), 7 deletions(-)

diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c
index f9d8cdfd08..1eb34f816c 100644
--- a/nptl/allocatestack.c
+++ b/nptl/allocatestack.c
@@ -33,6 +33,7 @@
 #include <nptl-stack.h>
 #include <libc-lock.h>
 #include <tls-internal.h>
+#include <malloc-hugepages.h>
 
 /* Default alignment of stack.  */
 #ifndef STACK_ALIGN
@@ -206,6 +207,31 @@ advise_stack_range (void *mem, size_t size, uintptr_t pd, size_t guardsize)
 #endif
 }
 
+/* If Transparent Huge Page (THP) is set to 'always', the thread stack might
+   be backed by Huge Pages depending of the asigned address by the kernel and
+   if resulting guard position.  If the guard page is within the same large
+   page that might be used by the stack itself, changing stack permission
+   will make the allocated range no longer be server with THP.  The kernel will
+   then revert back using default page size.
+
+   In this case, besides the aditional work, the kernel will need to potential
+   keep all the pages since it can distinguish which one was really touched by
+   the process.  This result in a large RSS usage than just madvise the range
+   to not use huge pages.  */
+static __always_inline int
+advise_thp (void *mem, size_t size, char *guard)
+{
+  enum malloc_thp_mode_t thpmode = __malloc_thp_mode ();
+  if (thpmode != malloc_thp_mode_always)
+    return 0;
+
+  unsigned long int thpsize = __malloc_default_thp_pagesize ();
+  if (PTR_ALIGN_DOWN (mem, thpsize) != PTR_ALIGN_DOWN (guard, thpsize))
+    return 0;
+
+  return __madvise (mem, size, MADV_NOHUGEPAGE);
+}
+
 /* Returns a usable stack for a new thread either by allocating a
    new stack or reusing a cached stack of sufficient size.
    ATTR must be non-NULL and point to a valid pthread_attr.
@@ -396,6 +422,14 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
 	    {
 	      char *guard = guard_position (mem, size, guardsize, pd,
 					    pagesize_m1);
+
+	      if (__glibc_unlikely (__nptl_stack_hugetlb == 1))
+		{
+		  int r = advise_thp (mem, size, guard);
+		  if (r != 0)
+		    return r;
+		}
+
 	      if (setup_stack_prot (mem, size, guard, guardsize, prot) != 0)
 		{
 		  __munmap (mem, size);
diff --git a/sysdeps/generic/malloc-hugepages.h b/sysdeps/generic/malloc-hugepages.h
index d68b85630c..21d4844bc4 100644
--- a/sysdeps/generic/malloc-hugepages.h
+++ b/sysdeps/generic/malloc-hugepages.h
@@ -26,6 +26,7 @@ unsigned long int __malloc_default_thp_pagesize (void) attribute_hidden;
 
 enum malloc_thp_mode_t
 {
+  malloc_thp_mode_unknown,
   malloc_thp_mode_always,
   malloc_thp_mode_madvise,
   malloc_thp_mode_never,
diff --git a/sysdeps/unix/sysv/linux/malloc-hugepages.c b/sysdeps/unix/sysv/linux/malloc-hugepages.c
index 2f316474c1..e7877f098e 100644
--- a/sysdeps/unix/sysv/linux/malloc-hugepages.c
+++ b/sysdeps/unix/sysv/linux/malloc-hugepages.c
@@ -22,19 +22,33 @@
 #include <not-cancel.h>
 #include <sys/mman.h>
 
+/* The __malloc_thp_mode is called only in single-thread mode, either in
+   malloc initialization or pthread creation.  */
+static unsigned long int thp_pagesize = -1;
+
 unsigned long int
 __malloc_default_thp_pagesize (void)
 {
+  unsigned long int size = atomic_load_relaxed (&thp_pagesize);
+  if (size != -1)
+    return size;
+
   int fd = __open64_nocancel (
     "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", O_RDONLY);
   if (fd == -1)
-    return 0;
+    {
+      atomic_store_relaxed (&thp_pagesize, 0);
+      return 0;
+    }
 
   char str[INT_BUFSIZE_BOUND (unsigned long int)];
   ssize_t s = __read_nocancel (fd, str, sizeof (str));
   __close_nocancel (fd);
   if (s < 0)
-    return 0;
+    {
+      atomic_store_relaxed (&thp_pagesize, 0);
+      return 0;
+    }
 
   unsigned long int r = 0;
   for (ssize_t i = 0; i < s; i++)
@@ -44,16 +58,28 @@ __malloc_default_thp_pagesize (void)
       r *= 10;
       r += str[i] - '0';
     }
+  atomic_store_relaxed (&thp_pagesize, r);
   return r;
 }
 
+/* The __malloc_thp_mode is called only in single-thread mode, either in
+   malloc initialization or pthread creation.  */
+static enum malloc_thp_mode_t thp_mode = malloc_thp_mode_unknown;
+
 enum malloc_thp_mode_t
 __malloc_thp_mode (void)
 {
+  enum malloc_thp_mode_t mode = atomic_load_relaxed (&thp_mode);
+  if (mode != malloc_thp_mode_unknown)
+    return mode;
+
   int fd = __open64_nocancel ("/sys/kernel/mm/transparent_hugepage/enabled",
 			      O_RDONLY);
   if (fd == -1)
-    return malloc_thp_mode_not_supported;
+    {
+      atomic_store_relaxed (&thp_mode, malloc_thp_mode_not_supported);
+      return malloc_thp_mode_not_supported;
+    }
 
   static const char mode_always[]  = "[always] madvise never\n";
   static const char mode_madvise[] = "always [madvise] never\n";
@@ -69,13 +95,19 @@ __malloc_thp_mode (void)
   if (s == sizeof (mode_always) - 1)
     {
       if (strcmp (str, mode_always) == 0)
-	return malloc_thp_mode_always;
+	mode = malloc_thp_mode_always;
       else if (strcmp (str, mode_madvise) == 0)
-	return malloc_thp_mode_madvise;
+	mode = malloc_thp_mode_madvise;
       else if (strcmp (str, mode_never) == 0)
-	return malloc_thp_mode_never;
+	mode = malloc_thp_mode_never;
+      else
+	mode = malloc_thp_mode_not_supported;
     }
-  return malloc_thp_mode_not_supported;
+  else
+    mode = malloc_thp_mode_not_supported;
+
+  atomic_store_relaxed (&thp_mode, mode);
+  return mode;
 }
 
 static size_t
-- 
2.34.1


             reply	other threads:[~2023-04-20 17:25 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-04-20 17:24 Adhemerval Zanella via Libc-alpha [this message]
2023-05-03 12:42 ` [PATCH] nptl: Disable THP on thread stack if it incurs in large RSS usage Wilco Dijkstra via Libc-alpha
2023-05-15 17:57   ` Adhemerval Zanella Netto via Libc-alpha
2023-05-16 15:38     ` Wilco Dijkstra via Libc-alpha
2023-05-16 16:35       ` Adhemerval Zanella Netto via Libc-alpha
2023-05-17 12:49         ` Wilco Dijkstra via Libc-alpha
2023-05-17 13:12           ` Cupertino Miranda via Libc-alpha
2023-05-17 13:20           ` Adhemerval Zanella Netto via Libc-alpha
2023-05-17 14:22             ` Wilco Dijkstra via Libc-alpha
2023-05-17 16:50               ` Adhemerval Zanella Netto via Libc-alpha
2023-05-17 18:16                 ` Wilco Dijkstra via Libc-alpha
2023-05-18 13:04                   ` Adhemerval Zanella Netto via Libc-alpha
2023-05-23  9:48                     ` Wilco Dijkstra via Libc-alpha
2024-01-31  2:03                       ` Cristian Rodríguez
2024-01-31  7:54                         ` Florian Weimer
2024-01-31 11:30                           ` Adhemerval Zanella Netto
2024-01-31 11:43                             ` Florian Weimer
2024-03-12  0:55                               ` Cristian Rodríguez
2024-01-31 15:18                             ` Cristian Rodríguez
2024-02-01  1:26                               ` Cristian Rodríguez
2023-05-16 14:30 ` Cupertino Miranda via Libc-alpha

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/libc/involved.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230420172436.2013698-1-adhemerval.zanella@linaro.org \
    --to=libc-alpha@sourceware.org \
    --cc=Wilco.Dijkstra@arm.com \
    --cc=adhemerval.zanella@linaro.org \
    --cc=cupertino.miranda@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).