From: Huang Shijie via Libc-alpha <libc-alpha@sourceware.org>
To: carlos@systemhalted.org
Cc: Huang Shijie <shijie@os.amperecomputing.com>,
zwang@amperecomputing.com, patches@amperecomputing.com,
libc-alpha@sourceware.org
Subject: [PATCH] Add LD_NUMA_REPLICATION for glibc
Date: Fri, 3 Sep 2021 12:14:34 +0000 [thread overview]
Message-ID: <20210903121434.12162-1-shijie@os.amperecomputing.com> (raw)
This patch adds LD_NUMA_REPLICATION which influences the linkage of shared libraries at run time.
If LD_NUMA_REPLICATION is set for program foo like this:
#LD_NUMA_REPLICATION=1 ./foo
At the time ld.so mmaps the shared libraries, it will uses
mmap(, c->prot | PROT_WRITE, MAP_COPY | MAP_FILE | MAP_POPULATE,)
for them, and the mmap will trigger COW(copy on write) for the shared libraries
at the NUMA node which the program `foo` runs. After the COW, the foo will have a copy of
the shared library segment(mmap covered) which belong to the same NUMA node.
So when enable LD_NUMA_REPLICATION, it will consume more memory,
but it will reduce the remote-access in NUMA.
Signed-off-by: Huang Shijie <shijie@os.amperecomputing.com>
---
elf/dl-map-segments.h | 28 ++++++++++++++++++++++++----
elf/dl-support.c | 4 ++++
elf/rtld.c | 4 ++++
sysdeps/generic/ldsodefs.h | 4 ++++
4 files changed, 36 insertions(+), 4 deletions(-)
diff --git a/elf/dl-map-segments.h b/elf/dl-map-segments.h
index f9fb110e..ae6661a7 100644
--- a/elf/dl-map-segments.h
+++ b/elf/dl-map-segments.h
@@ -52,13 +52,33 @@ _dl_map_segments (struct link_map *l, int fd,
c->mapstart & GLRO(dl_use_load_bias))
- MAP_BASE_ADDR (l));
- /* Remember which part of the address space this object uses. */
- l->l_map_start = (ElfW(Addr)) __mmap ((void *) mappref, maplength,
+ if (__glibc_unlikely(GLRO(dl_numa_replication)))
+ {
+ /* Trigger the linux kernel COW(copy on write) on purpose */
+ l->l_map_start = (ElfW(Addr)) __mmap ((void *) mappref, maplength,
+ c->prot|PROT_WRITE,
+ MAP_COPY|MAP_FILE|MAP_POPULATE,
+ fd, c->mapoff);
+ if (__glibc_unlikely ((void *) l->l_map_start == MAP_FAILED))
+ return DL_MAP_SEGMENTS_ERROR_MAP_SEGMENT;
+
+ /* Change back to c->prot if needed */
+ if (!(c->prot & PROT_WRITE))
+ {
+ if (__mprotect((caddr_t)l->l_map_start, maplength, c->prot))
+ return DL_MAP_SEGMENTS_ERROR_MPROTECT;
+ }
+ }
+ else
+ {
+ /* Remember which part of the address space this object uses. */
+ l->l_map_start = (ElfW(Addr)) __mmap ((void *) mappref, maplength,
c->prot,
MAP_COPY|MAP_FILE,
fd, c->mapoff);
- if (__glibc_unlikely ((void *) l->l_map_start == MAP_FAILED))
- return DL_MAP_SEGMENTS_ERROR_MAP_SEGMENT;
+ if (__glibc_unlikely ((void *) l->l_map_start == MAP_FAILED))
+ return DL_MAP_SEGMENTS_ERROR_MAP_SEGMENT;
+ }
l->l_map_end = l->l_map_start + maplength;
l->l_addr = l->l_map_start - c->mapstart;
diff --git a/elf/dl-support.c b/elf/dl-support.c
index 01557181..d2eb3164 100644
--- a/elf/dl-support.c
+++ b/elf/dl-support.c
@@ -79,6 +79,10 @@ const char *_dl_origin_path;
/* Nonzero if runtime lookup should not update the .got/.plt. */
int _dl_bind_not;
+ /* Do we want to do the replication(by linux copy on write) for shared libraries in NUMA?
+ Only valid in the linux system. */
+int _dl_numa_replication;
+
/* A dummy link map for the executable, used by dlopen to access the global
scope. We don't export any symbols ourselves, so this can be minimal. */
static struct link_map _dl_main_map =
diff --git a/elf/rtld.c b/elf/rtld.c
index d733359e..10378c00 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -2788,7 +2788,11 @@ process_envvars (struct dl_main_state *state)
GLRO(dl_verbose) = 1;
GLRO(dl_debug_mask) |= DL_DEBUG_PRELINK;
GLRO(dl_trace_prelink) = &envline[17];
+ break;
}
+
+ if (memcmp (envline, "NUMA_REPLICATION", 16) == 0)
+ GLRO(dl_numa_replication) = true;
break;
case 20:
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index 9c152592..f6114522 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -569,6 +569,10 @@ struct rtld_global_ro
/* Nonzero if runtime lookups should not update the .got/.plt. */
EXTERN int _dl_bind_not;
+ /* Do we want to do the replication(by linux copy on write) for shared libraries in NUMA?
+ Only valid in the linux system. */
+ EXTERN int _dl_numa_replication;
+
/* Nonzero if references should be treated as weak during runtime
linking. */
EXTERN int _dl_dynamic_weak;
--
2.30.2
next reply other threads:[~2021-09-03 4:16 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-09-03 12:14 Huang Shijie via Libc-alpha [this message]
2021-09-03 6:28 ` [PATCH] Add LD_NUMA_REPLICATION for glibc Florian Weimer via Libc-alpha
2021-09-03 15:15 ` Huang Shijie via Libc-alpha
2021-09-03 22:16 ` Song Bao Hua (Barry Song) via Libc-alpha
2021-09-06 9:14 ` Huang Shijie via Libc-alpha
2021-09-09 10:19 ` Huang Shijie via Libc-alpha
2021-09-10 11:01 ` Florian Weimer via Libc-alpha
2021-09-13 14:40 ` Huang Shijie via Libc-alpha
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://www.gnu.org/software/libc/involved.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210903121434.12162-1-shijie@os.amperecomputing.com \
--to=libc-alpha@sourceware.org \
--cc=carlos@systemhalted.org \
--cc=patches@amperecomputing.com \
--cc=shijie@os.amperecomputing.com \
--cc=zwang@amperecomputing.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).