unofficial mirror of libc-alpha@sourceware.org
 help / color / mirror / Atom feed
From: Huang Shijie via Libc-alpha <libc-alpha@sourceware.org>
To: carlos@systemhalted.org
Cc: Huang Shijie <shijie@os.amperecomputing.com>,
	zwang@amperecomputing.com, patches@amperecomputing.com,
	libc-alpha@sourceware.org
Subject: [PATCH] Add LD_NUMA_REPLICATION for glibc
Date: Fri,  3 Sep 2021 12:14:34 +0000	[thread overview]
Message-ID: <20210903121434.12162-1-shijie@os.amperecomputing.com> (raw)

This patch adds LD_NUMA_REPLICATION which influences the linkage of shared libraries at run time.

If LD_NUMA_REPLICATION is set for program foo like this:
	#LD_NUMA_REPLICATION=1 ./foo

At the time ld.so mmaps the shared libraries, it will uses
	mmap(, c->prot | PROT_WRITE, MAP_COPY | MAP_FILE | MAP_POPULATE,)
for them, and the mmap will trigger COW(copy on write) for the shared libraries
at the NUMA node which the program `foo` runs. After the COW, the foo will have a copy of
the shared library segment(mmap covered) which belong to the same NUMA node.

So when enable LD_NUMA_REPLICATION, it will consume more memory,
but it will reduce the remote-access in NUMA.

Signed-off-by: Huang Shijie <shijie@os.amperecomputing.com>
---
 elf/dl-map-segments.h      | 28 ++++++++++++++++++++++++----
 elf/dl-support.c           |  4 ++++
 elf/rtld.c                 |  4 ++++
 sysdeps/generic/ldsodefs.h |  4 ++++
 4 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/elf/dl-map-segments.h b/elf/dl-map-segments.h
index f9fb110e..ae6661a7 100644
--- a/elf/dl-map-segments.h
+++ b/elf/dl-map-segments.h
@@ -52,13 +52,33 @@ _dl_map_segments (struct link_map *l, int fd,
                                   c->mapstart & GLRO(dl_use_load_bias))
            - MAP_BASE_ADDR (l));
 
-      /* Remember which part of the address space this object uses.  */
-      l->l_map_start = (ElfW(Addr)) __mmap ((void *) mappref, maplength,
+      if (__glibc_unlikely(GLRO(dl_numa_replication)))
+      {
+	/* Trigger the linux kernel COW(copy on write) on purpose */
+        l->l_map_start = (ElfW(Addr)) __mmap ((void *) mappref, maplength,
+                                            c->prot|PROT_WRITE,
+                                            MAP_COPY|MAP_FILE|MAP_POPULATE,
+                                            fd, c->mapoff);
+        if (__glibc_unlikely ((void *) l->l_map_start == MAP_FAILED))
+          return DL_MAP_SEGMENTS_ERROR_MAP_SEGMENT;
+
+	/* Change back to c->prot if needed */
+	if (!(c->prot & PROT_WRITE))
+        {
+	  if (__mprotect((caddr_t)l->l_map_start, maplength, c->prot))
+            return DL_MAP_SEGMENTS_ERROR_MPROTECT;
+	}
+      }
+      else
+      {
+        /* Remember which part of the address space this object uses.  */
+        l->l_map_start = (ElfW(Addr)) __mmap ((void *) mappref, maplength,
                                             c->prot,
                                             MAP_COPY|MAP_FILE,
                                             fd, c->mapoff);
-      if (__glibc_unlikely ((void *) l->l_map_start == MAP_FAILED))
-        return DL_MAP_SEGMENTS_ERROR_MAP_SEGMENT;
+        if (__glibc_unlikely ((void *) l->l_map_start == MAP_FAILED))
+          return DL_MAP_SEGMENTS_ERROR_MAP_SEGMENT;
+      }
 
       l->l_map_end = l->l_map_start + maplength;
       l->l_addr = l->l_map_start - c->mapstart;
diff --git a/elf/dl-support.c b/elf/dl-support.c
index 01557181..d2eb3164 100644
--- a/elf/dl-support.c
+++ b/elf/dl-support.c
@@ -79,6 +79,10 @@ const char *_dl_origin_path;
 /* Nonzero if runtime lookup should not update the .got/.plt.  */
 int _dl_bind_not;
 
+  /* Do we want to do the replication(by linux copy on write) for shared libraries in NUMA?
+   Only valid in the linux system. */
+int _dl_numa_replication;
+
 /* A dummy link map for the executable, used by dlopen to access the global
    scope.  We don't export any symbols ourselves, so this can be minimal.  */
 static struct link_map _dl_main_map =
diff --git a/elf/rtld.c b/elf/rtld.c
index d733359e..10378c00 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -2788,7 +2788,11 @@ process_envvars (struct dl_main_state *state)
 	      GLRO(dl_verbose) = 1;
 	      GLRO(dl_debug_mask) |= DL_DEBUG_PRELINK;
 	      GLRO(dl_trace_prelink) = &envline[17];
+	      break;
 	    }
+
+	  if (memcmp (envline, "NUMA_REPLICATION", 16) == 0)
+	    GLRO(dl_numa_replication) = true;
 	  break;
 
 	case 20:
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index 9c152592..f6114522 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -569,6 +569,10 @@ struct rtld_global_ro
   /* Nonzero if runtime lookups should not update the .got/.plt.  */
   EXTERN int _dl_bind_not;
 
+  /* Do we want to do the replication(by linux copy on write) for shared libraries in NUMA?
+     Only valid in the linux system. */
+  EXTERN int _dl_numa_replication;
+
   /* Nonzero if references should be treated as weak during runtime
      linking.  */
   EXTERN int _dl_dynamic_weak;
-- 
2.30.2


             reply	other threads:[~2021-09-03  4:16 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-03 12:14 Huang Shijie via Libc-alpha [this message]
2021-09-03  6:28 ` [PATCH] Add LD_NUMA_REPLICATION for glibc Florian Weimer via Libc-alpha
2021-09-03 15:15   ` Huang Shijie via Libc-alpha
2021-09-03 22:16     ` Song Bao Hua (Barry Song) via Libc-alpha
2021-09-06  9:14       ` Huang Shijie via Libc-alpha
2021-09-09 10:19   ` Huang Shijie via Libc-alpha
2021-09-10 11:01     ` Florian Weimer via Libc-alpha
2021-09-13 14:40       ` Huang Shijie via Libc-alpha

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/libc/involved.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210903121434.12162-1-shijie@os.amperecomputing.com \
    --to=libc-alpha@sourceware.org \
    --cc=carlos@systemhalted.org \
    --cc=patches@amperecomputing.com \
    --cc=shijie@os.amperecomputing.com \
    --cc=zwang@amperecomputing.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).