unofficial mirror of libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] malloc: Use correct C11 atomics for fastbin
@ 2022-11-21 16:09 Wilco Dijkstra via Libc-alpha
  2022-11-21 16:18 ` Florian Weimer via Libc-alpha
  2022-12-02  5:11 ` DJ Delorie via Libc-alpha
  0 siblings, 2 replies; 23+ messages in thread
From: Wilco Dijkstra via Libc-alpha @ 2022-11-21 16:09 UTC (permalink / raw)
  To: 'GNU C Library'; +Cc: Florian Weimer


Fix memory ordering issues in the fastbin implementation: in REMOVE_FB the
next pointer is read before any MO synchronization, however in the C11 atomic
model this is only correct after a load acquire. Refactor the fastbin code
and add a dedicated fastbin_push/pop implementation. The number of acquire
or release atomics remains the same, and the new functions are inlined, so
performance is unaffected.

Passes regress, OK for commit?

---

diff --git a/malloc/malloc.c b/malloc/malloc.c
index 2a61c8b5ee38f33c42c72f3c5ad441e26ed0e701..a3e7053d2db81cce211dd8c3cff43af0f59a1b71 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -1786,6 +1786,73 @@ get_max_fast (void)
   return global_max_fast;
 }
 
+
+/* Atomically push onto the fastbin list.  Return the previous head of the
+   list, however it can only be referenced if the arena lock is acquired.  */
+static __always_inline mchunkptr
+fastbin_push_chunk (mchunkptr *fastbin, mchunkptr p)
+{
+  mchunkptr head = atomic_load_relaxed (fastbin);
+
+  /* Check that the top of the bin is not the record we are going to
+     add (i.e., double free).  */
+  if (__builtin_expect (head == p, 0))
+    malloc_printerr ("double free or corruption (fasttop)");
+
+  if (SINGLE_THREAD_P)
+    {
+      p->fd = PROTECT_PTR (&p->fd, head);
+      *fastbin = p;
+      return head;
+    }
+
+  /* Atomically update the fastbin head.  Use release MO to synchronize
+     with acquire reads in fastbin_pop_chunk and malloc_consolidate (this
+     ensures the next pointer update is valid).  */
+  do
+    {
+      p->fd = PROTECT_PTR (&p->fd, head);
+    }
+  while (!atomic_compare_exchange_weak_release (fastbin, &head, p));
+
+  return head;
+}
+
+
+/* Atomically pop from the fastbin list.  The arena lock must be held to
+   block other threads removing entries, avoiding the ABA issue.  */
+static __always_inline mchunkptr
+fastbin_pop_chunk (mchunkptr *fastbin)
+{
+  mchunkptr head, tail;
+
+  if (SINGLE_THREAD_P)
+    {
+      head = *fastbin;
+      if (head == NULL)
+	return head;
+      if (__glibc_unlikely (misaligned_chunk (head)))
+	malloc_printerr ("malloc(): unaligned fastbin chunk detected");
+      *fastbin = REVEAL_PTR (head->fd);
+      return head;
+    }
+
+  do
+    {
+      /* Synchronize with release MO CAS in fastbin_pop_chunk - this ensures
+	 the next pointer is valid.  */
+      head = atomic_load_acquire (fastbin);
+      if (head == NULL)
+	return head;
+      if (__glibc_unlikely (head != NULL && misaligned_chunk (head)))
+	malloc_printerr ("malloc(): unaligned fastbin chunk detected");
+      tail = REVEAL_PTR (head->fd);
+    }
+  while (!atomic_compare_exchange_weak_relaxed (fastbin, &head, tail));
+
+  return head;
+}
+
 /*
    ----------- Internal state representation and initialization -----------
  */
@@ -3798,71 +3865,37 @@ _int_malloc (mstate av, size_t bytes)
      can try it without checking, which saves some time on this fast path.
    */
 
-#define REMOVE_FB(fb, victim, pp)			\
-  do							\
-    {							\
-      victim = pp;					\
-      if (victim == NULL)				\
-	break;						\
-      pp = REVEAL_PTR (victim->fd);                                     \
-      if (__glibc_unlikely (pp != NULL && misaligned_chunk (pp)))       \
-	malloc_printerr ("malloc(): unaligned fastbin chunk detected"); \
-    }							\
-  while ((pp = catomic_compare_and_exchange_val_acq (fb, pp, victim)) \
-	 != victim);					\
-
   if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
     {
       idx = fastbin_index (nb);
       mfastbinptr *fb = &fastbin (av, idx);
-      mchunkptr pp;
-      victim = *fb;
+      victim = fastbin_pop_chunk (fb);
 
       if (victim != NULL)
 	{
-	  if (__glibc_unlikely (misaligned_chunk (victim)))
-	    malloc_printerr ("malloc(): unaligned fastbin chunk detected 2");
-
-	  if (SINGLE_THREAD_P)
-	    *fb = REVEAL_PTR (victim->fd);
-	  else
-	    REMOVE_FB (fb, pp, victim);
-	  if (__glibc_likely (victim != NULL))
-	    {
-	      size_t victim_idx = fastbin_index (chunksize (victim));
-	      if (__builtin_expect (victim_idx != idx, 0))
-		malloc_printerr ("malloc(): memory corruption (fast)");
-	      check_remalloced_chunk (av, victim, nb);
+	  size_t victim_idx = fastbin_index (chunksize (victim));
+	  if (__builtin_expect (victim_idx != idx, 0))
+	    malloc_printerr ("malloc(): memory corruption (fast)");
+	  check_remalloced_chunk (av, victim, nb);
 #if USE_TCACHE
-	      /* While we're here, if we see other chunks of the same size,
-		 stash them in the tcache.  */
-	      size_t tc_idx = csize2tidx (nb);
-	      if (tcache && tc_idx < mp_.tcache_bins)
+	  /* While we're here, if we see other chunks of the same size,
+	     stash them in the tcache.  */
+	  size_t tc_idx = csize2tidx (nb);
+	  if (tcache && tc_idx < mp_.tcache_bins)
+	    {
+	      /* While bin not empty and tcache not full, copy chunks.  */
+	      while (tcache->counts[tc_idx] < mp_.tcache_count)
 		{
-		  mchunkptr tc_victim;
-
-		  /* While bin not empty and tcache not full, copy chunks.  */
-		  while (tcache->counts[tc_idx] < mp_.tcache_count
-			 && (tc_victim = *fb) != NULL)
-		    {
-		      if (__glibc_unlikely (misaligned_chunk (tc_victim)))
-			malloc_printerr ("malloc(): unaligned fastbin chunk detected 3");
-		      if (SINGLE_THREAD_P)
-			*fb = REVEAL_PTR (tc_victim->fd);
-		      else
-			{
-			  REMOVE_FB (fb, pp, tc_victim);
-			  if (__glibc_unlikely (tc_victim == NULL))
-			    break;
-			}
-		      tcache_put (tc_victim, tc_idx);
-		    }
+		  mchunkptr tc_victim = fastbin_pop_chunk (fb);
+		  if (tc_victim == NULL)
+		    break;
+		  tcache_put (tc_victim, tc_idx);
 		}
-#endif
-	      void *p = chunk2mem (victim);
-	      alloc_perturb (p, bytes);
-	      return p;
 	    }
+#endif
+	  void *p = chunk2mem (victim);
+	  alloc_perturb (p, bytes);
+	  return p;
 	}
     }
 
@@ -4505,29 +4538,7 @@ _int_free (mstate av, mchunkptr p, int have_lock)
     fb = &fastbin (av, idx);
 
     /* Atomically link P to its fastbin: P->FD = *FB; *FB = P;  */
-    mchunkptr old = *fb, old2;
-
-    if (SINGLE_THREAD_P)
-      {
-	/* Check that the top of the bin is not the record we are going to
-	   add (i.e., double free).  */
-	if (__builtin_expect (old == p, 0))
-	  malloc_printerr ("double free or corruption (fasttop)");
-	p->fd = PROTECT_PTR (&p->fd, old);
-	*fb = p;
-      }
-    else
-      do
-	{
-	  /* Check that the top of the bin is not the record we are going to
-	     add (i.e., double free).  */
-	  if (__builtin_expect (old == p, 0))
-	    malloc_printerr ("double free or corruption (fasttop)");
-	  old2 = old;
-	  p->fd = PROTECT_PTR (&p->fd, old);
-	}
-      while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2))
-	     != old2);
+    mchunkptr old = fastbin_push_chunk (fb, p);
 
     /* Check that size of fastbin chunk at the top is the same as
        size of the chunk that we are adding.  We can dereference OLD
@@ -4718,6 +4729,7 @@ static void malloc_consolidate(mstate av)
   maxfb = &fastbin (av, NFASTBINS - 1);
   fb = &fastbin (av, 0);
   do {
+    /* Synchronize with relase MO CAS in fastbin_push_chunk.  */
     p = atomic_exchange_acquire (fb, NULL);
     if (p != 0) {
       do {


^ permalink raw reply related	[flat|nested] 23+ messages in thread
* [PATCH] malloc: Use correct C11 atomics for fastbin
@ 2022-12-06 15:04 Wilco Dijkstra via Libc-alpha
  0 siblings, 0 replies; 23+ messages in thread
From: Wilco Dijkstra via Libc-alpha @ 2022-12-06 15:04 UTC (permalink / raw)
  To: zack@owlfolio.org, Adhemerval Zanella; +Cc: 'GNU C Library'

Hi,

>> Modern allocators are not only much faster than GLIBC in their default settings
>>> but also have lower memory usage. The two best allocators seem to be mimalloc
>>> and jemalloc.
>>
>>
>> Do they have compatible license so we can just import/adapt the code?
>
> Per https://github.com/jemalloc/jemalloc/blob/dev/COPYING jemalloc is 2-clause BSD.
> I don't know where to find mimalloc off the top of my head.

Mimalloc uses the MIT license: https://github.com/microsoft/mimalloc/blob/master/LICENSE

Cheers,
Wilco

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2022-12-15 15:44 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-21 16:09 [PATCH] malloc: Use correct C11 atomics for fastbin Wilco Dijkstra via Libc-alpha
2022-11-21 16:18 ` Florian Weimer via Libc-alpha
2022-11-21 16:55   ` Wilco Dijkstra via Libc-alpha
2022-11-21 16:56     ` Wilco Dijkstra via Libc-alpha
2022-11-21 17:00     ` Florian Weimer via Libc-alpha
2022-12-02  5:11 ` DJ Delorie via Libc-alpha
2022-12-02  6:36   ` Florian Weimer via Libc-alpha
2022-12-02 10:56     ` Wilco Dijkstra via Libc-alpha
2022-12-02 11:24       ` Florian Weimer via Libc-alpha
2022-12-02 12:02         ` Wilco Dijkstra via Libc-alpha
2022-12-02 18:55           ` DJ Delorie via Libc-alpha
2022-12-05 18:39             ` Zack Weinberg via Libc-alpha
2022-12-06 16:19               ` DJ Delorie via Libc-alpha
2022-12-12  3:35                 ` Zack Weinberg via Libc-alpha
2022-12-12 11:57                   ` Florian Weimer via Libc-alpha
2022-12-12 11:56                 ` Florian Weimer via Libc-alpha
2022-12-06 13:29             ` Wilco Dijkstra via Libc-alpha
2022-12-06 13:37               ` Adhemerval Zanella Netto via Libc-alpha
2022-12-06 14:31                 ` Zack Weinberg via Libc-alpha
2022-12-06 16:23               ` DJ Delorie via Libc-alpha
2022-12-15 15:43                 ` Wilco Dijkstra via Libc-alpha
2022-12-02 18:55     ` DJ Delorie via Libc-alpha
  -- strict thread matches above, loose matches on Subject: below --
2022-12-06 15:04 Wilco Dijkstra via Libc-alpha

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).