ruby-core@ruby-lang.org archive (unofficial mirror)
 help / color / mirror / Atom feed
From: Daniel DeLorme <dan-ml@dan42.com>
To: ruby-core@ruby-lang.org
Subject: Re: Copy-on-write friendly garbage collector
Date: Sat, 8 Mar 2008 19:01:51 +0900	[thread overview]
Message-ID: <47D26413.5060708@dan42.com> (raw)
In-Reply-To: <47D24561.9090909@dan42.com>

[-- Attachment #1: Type: text/plain, Size: 843 bytes --]

Third email in a row... I hope that's not considered spamming ~_~

Since the patch was based on r15675, I tried to merge that plus my 
changes into the trunk (r15730) but ran into some conflicts. While 
resolving them, I realized that r15683 and 15684 (the apparent source of 
the conflict) were sorting the heap list by order of memory location. 
But that's not needed if a linear search is more efficient than a 
bsearch, so I removed the sorting. New heaps are now added at the end of 
the list and the list is searched from last to first (therefore largest 
to smallest).

So now, for the following test case which generates 9 heaps
   GC.disable
   x=(1..1000000).map{"x"*10}
   GC.enable
   GC.start
The COW-friendly version is only 1-2% slower than r15730.

I hope this makes it into the trunk but please do review the code.

--
Daniel

[-- Attachment #2: cow-friendly.patch --]
[-- Type: text/x-patch, Size: 15532 bytes --]

Index: debug.c
===================================================================
--- debug.c	(revision 15730)
+++ debug.c	(working copy)
@@ -29,8 +29,8 @@
         RUBY_ENC_CODERANGE_7BIT    = ENC_CODERANGE_7BIT,
         RUBY_ENC_CODERANGE_VALID   = ENC_CODERANGE_VALID,
         RUBY_ENC_CODERANGE_BROKEN  = ENC_CODERANGE_BROKEN, 
-        RUBY_FL_MARK        = FL_MARK,
-        RUBY_FL_RESERVED    = FL_RESERVED,
+        RUBY_FL_RESERVED0   = FL_RESERVED0,
+        RUBY_FL_RESERVED1   = FL_RESERVED1,
         RUBY_FL_FINALIZE    = FL_FINALIZE,
         RUBY_FL_TAINT       = FL_TAINT,
         RUBY_FL_EXIVAR      = FL_EXIVAR,
Index: include/ruby/ruby.h
===================================================================
--- include/ruby/ruby.h	(revision 15730)
+++ include/ruby/ruby.h	(working copy)
@@ -621,8 +621,8 @@
 #define RVALUES(obj) (R_CAST(RValues)(obj))
 
 #define FL_SINGLETON FL_USER0
-#define FL_MARK      (((VALUE)1)<<5)
-#define FL_RESERVED  (((VALUE)1)<<6) /* will be used in the future GC */
+#define FL_RESERVED0 (((VALUE)1)<<5) /* will be used in the future GC */
+#define FL_RESERVED1 (((VALUE)1)<<6) /* will be used in the future GC */
 #define FL_FINALIZE  (((VALUE)1)<<7)
 #define FL_TAINT     (((VALUE)1)<<8)
 #define FL_EXIVAR    (((VALUE)1)<<9)
@@ -716,6 +716,7 @@
 void rb_register_mark_object(VALUE);
 void rb_gc_register_address(VALUE*);
 void rb_gc_unregister_address(VALUE*);
+int rb_gc_is_thread_marked(VALUE);
 
 ID rb_intern(const char*);
 ID rb_intern2(const char*, long);
Index: gc.c
===================================================================
--- gc.c	(revision 15730)
+++ gc.c	(working copy)
@@ -22,8 +22,14 @@
 #include "gc.h"
 #include <stdio.h>
 #include <setjmp.h>
+#include <math.h>
 #include <sys/types.h>
 
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
 #ifdef HAVE_SYS_TIME_H
 #include <sys/time.h>
 #endif
@@ -145,6 +151,9 @@
     void *membase;
     RVALUE *slot;
     int limit;
+    RVALUE *slotlimit;
+    int *marks;
+    int marks_size;
 } *heaps;
 static int heaps_length = 0;
 static int heaps_used   = 0;
@@ -322,7 +331,29 @@
 	RUBY_CRITICAL(free(x));
 }
 
+static int debugging = 0;
 
+#define OPTION_ENABLED(name) (getenv((name)) && *getenv((name)) && *getenv((name)) != '0')
+
+static void *
+alloc_ruby_heap(size_t size)
+{
+    return malloc(size);
+}
+
+static void
+free_ruby_heap(void *heap)
+{
+    free(heap);
+}
+
+static void
+init_debugging()
+{
+    debugging = OPTION_ENABLED("RUBY_GC_DEBUG");
+}
+
+
 /*
  *  call-seq:
  *     GC.enable    => true or false
@@ -413,11 +444,107 @@
     }
 }
 
+static struct heaps_slot *last_heap = NULL;
+
+static inline struct heaps_slot *
+find_heap_slot_for_object(RVALUE *object)
+{
+    struct heaps_slot *heap;
+    register int i;
+
+    /* Look in the cache first. */
+    if (last_heap != NULL && object >= last_heap->slot
+	&& object < last_heap->slotlimit) {
+	return last_heap;
+    }
+    /* find heap_slot for object using linear search
+     * (faster than bsearch because there are only a few heaps)
+     */
+    i = heaps_used;
+    while (i) {
+	i--;
+	heap = &heaps[i];
+	if (heap->slot <= object) {
+	    if (object < heap->slotlimit) {
+		/* Cache this result. According to empirical evidence, the chance is
+		 * high that the next lookup will be for the same heap slot.
+		 */
+		last_heap = heap;
+		return heap;
+	    }
+	}
+    }
+    return NULL;
+}
+
+static inline void
+find_position_in_bitfield(struct heaps_slot *hs, RVALUE *object,
+                          unsigned int *bitfield_index, unsigned int *bitfield_offset)
+{
+    unsigned int index;
+
+    index = object - hs->slot;
+    *bitfield_index = index / (sizeof(int) * 8);
+    *bitfield_offset = index % (sizeof(int) * 8);
+}
+
+
 static void
+rb_mark_table_add(RVALUE *object)
+{
+    struct heaps_slot *hs;
+    unsigned int bitfield_index, bitfield_offset;
+
+    hs = find_heap_slot_for_object(object);
+    if (hs != NULL) {
+	find_position_in_bitfield(hs, object, &bitfield_index, &bitfield_offset);
+	hs->marks[bitfield_index] |= (1 << bitfield_offset);
+    }
+}
+
+static inline int
+rb_mark_table_heap_contains(struct heaps_slot *hs, RVALUE *object)
+{
+    unsigned int bitfield_index, bitfield_offset;
+
+    find_position_in_bitfield(hs, object, &bitfield_index, &bitfield_offset);
+    return hs->marks[bitfield_index] & (1 << bitfield_offset);
+}
+
+static inline int
+rb_mark_table_contains(RVALUE *object)
+{
+    struct heaps_slot *hs;
+
+    hs = find_heap_slot_for_object(object);
+    if (hs != NULL) {
+	return rb_mark_table_heap_contains(hs, object);
+    }
+}
+
+static inline void
+rb_mark_table_heap_remove(struct heaps_slot *hs, RVALUE *object)
+{
+    unsigned int bitfield_index, bitfield_offset;
+    find_position_in_bitfield(hs, object, &bitfield_index, &bitfield_offset);
+    hs->marks[bitfield_index] &= ~(1 << bitfield_offset);
+}
+
+static void
+rb_mark_table_remove(RVALUE *object)
+{
+    struct heaps_slot *hs;
+
+    hs = find_heap_slot_for_object(object);
+    if (hs != NULL) {
+	rb_mark_table_heap_remove(hs, object);
+    }
+}
+
+static void
 add_heap(void)
 {
-    RVALUE *p, *pend, *membase;
-    long hi, lo, mid;
+    RVALUE *p, *pend;
 
     if (heaps_used == heaps_length) {
 	/* Realloc heaps */
@@ -438,45 +565,26 @@
     }
 
     for (;;) {
-	RUBY_CRITICAL(p = (RVALUE*)malloc(sizeof(RVALUE)*(heap_slots+1)));
+	RUBY_CRITICAL(p = (RVALUE*)alloc_ruby_heap(sizeof(RVALUE)*(heap_slots+1)));
 	if (p == 0) {
 	    if (heap_slots == HEAP_MIN_SLOTS) {
 		rb_memerror();
 	    }
 	    heap_slots = HEAP_MIN_SLOTS;
+	    continue;
 	}
-	else {
-	    break;
-	}
+	heaps[heaps_used].membase = p;
+	if ((VALUE)p % sizeof(RVALUE) == 0)
+	    heap_slots += 1;
+	else
+	    p = (RVALUE*)((VALUE)p + sizeof(RVALUE) - ((VALUE)p % sizeof(RVALUE)));
+	heaps[heaps_used].slot = p;
+	heaps[heaps_used].limit = heap_slots;
+	heaps[heaps_used].slotlimit = p + heap_slots;
+        heaps[heaps_used].marks_size = (int) (ceil(heap_slots / (sizeof(int) * 8.0)));
+        heaps[heaps_used].marks = (int *) calloc(heaps[heaps_used].marks_size, sizeof(int));
+	break;
     }
-
-    lo = 0;
-    hi = heaps_used;
-    while (lo < hi) {
-	mid = (lo + hi) / 2;
-	membase = heaps[mid].membase;
-	if (membase < p) {
-	    lo = mid + 1;
-	}
-	else if (membase > p) {
-	    hi = mid;
-	}
-	else {
-	    rb_bug("same heap slot is allocated: %p at %ld", p, mid);
-	}
-    }
-
-    membase = p;
-    if ((VALUE)p % sizeof(RVALUE) == 0)
-	heap_slots += 1;
-    else
-	p = (RVALUE*)((VALUE)p + sizeof(RVALUE) - ((VALUE)p % sizeof(RVALUE)));
-    if (hi < heaps_used) {
-	MEMMOVE(&heaps[hi+1], &heaps[hi], struct heaps_slot, heaps_used - hi);
-    }
-    heaps[hi].membase = membase;
-    heaps[hi].slot = p;
-    heaps[hi].limit = heap_slots;
     pend = p + heap_slots;
     if (lomem == 0 || lomem > p) lomem = p;
     if (himem < pend) himem = pend;
@@ -508,6 +616,7 @@
     freelist = freelist->as.free.next;
 
     MEMZERO((void*)obj, RVALUE, 1);
+    RANY(obj)->as.free.flags = 0;
 #ifdef GC_DEBUG
     RANY(obj)->file = rb_sourcefile();
     RANY(obj)->line = rb_sourceline();
@@ -710,14 +819,15 @@
 gc_mark_all(void)
 {
     RVALUE *p, *pend;
+    struct heaps_slot *heap;
     int i;
 
     init_mark_stack();
     for (i = 0; i < heaps_used; i++) {
-	p = heaps[i].slot; pend = p + heaps[i].limit;
+	heap = &heaps[i];
+	p = heap->slot; pend = heap->slotlimit;
 	while (p < pend) {
-	    if ((p->as.basic.flags & FL_MARK) &&
-		(p->as.basic.flags != FL_MARK)) {
+	    if (rb_mark_table_heap_contains(heap, p) && (p->as.basic.flags != 0)) {
 		gc_mark_children((VALUE)p, 0);
 	    }
 	    p++;
@@ -751,21 +861,8 @@
     if (p < lomem || p > himem) return Qfalse;
     if ((VALUE)p % sizeof(RVALUE) != 0) return Qfalse;
 
-    /* check if p looks like a pointer using bsearch*/
-    lo = 0;
-    hi = heaps_used;
-    while (lo < hi) {
-	mid = (lo + hi) / 2;
-	heap = &heaps[mid];
-	if (heap->slot <= p) {
-	    if (p < heap->slot + heap->limit)
-		return Qtrue;
-	    lo = mid + 1;
-	}
-	else {
-	    hi = mid;
-	}
-    }
+    if (find_heap_slot_for_object(p))
+	return Qtrue;
     return Qfalse;
 }
 
@@ -871,8 +968,8 @@
     obj = RANY(ptr);
     if (rb_special_const_p(ptr)) return; /* special const not marked */
     if (obj->as.basic.flags == 0) return;       /* free cell */
-    if (obj->as.basic.flags & FL_MARK) return;  /* already marked */
-    obj->as.basic.flags |= FL_MARK;
+    if (rb_mark_table_contains(obj)) return;  /* already marked */
+    rb_mark_table_add(obj);
 
     if (lev > GC_LEVEL_MAX || (lev == 0 && ruby_stack_check())) {
 	if (!mark_stack_overflow) {
@@ -906,8 +1003,8 @@
     obj = RANY(ptr);
     if (rb_special_const_p(ptr)) return; /* special const not marked */
     if (obj->as.basic.flags == 0) return;       /* free cell */
-    if (obj->as.basic.flags & FL_MARK) return;  /* already marked */
-    obj->as.basic.flags |= FL_MARK;
+    if (rb_mark_table_contains(obj)) return;  /* already marked */
+    rb_mark_table_add(obj);
 
   marking:
     if (FL_TEST(obj, FL_EXIVAR)) {
@@ -1161,10 +1258,15 @@
     while (p) {
 	RVALUE *tmp = p->as.free.next;
 	run_final((VALUE)p);
-	if (!FL_TEST(p, FL_SINGLETON)) { /* not freeing page */
+	/* Don't free objects that are singletons, or objects that are already freed.
+	 * The latter is to prevent the unnecessary marking of memory pages as dirty,
+	 * which can destroy copy-on-write semantics.
+	 */
+	if (!FL_TEST(p, FL_SINGLETON) && p->as.free.flags != 0) {
             VALGRIND_MAKE_MEM_UNDEFINED((void*)p, sizeof(RVALUE));
 	    p->as.free.flags = 0;
 	    p->as.free.next = freelist;
+	    rb_mark_table_remove(p);
 	    freelist = p;
 	}
 	p = tmp;
@@ -1178,7 +1280,8 @@
 
     for (i = j = 1; j < heaps_used; i++) {
 	if (heaps[i].limit == 0) {
-	    free(heaps[i].membase);
+	    free_ruby_heap(heaps[i].membase);
+	    free(heaps[i].marks);
 	    heaps_used--;
 	}
 	else {
@@ -1219,32 +1322,38 @@
 	int n = 0;
 	RVALUE *free = freelist;
 	RVALUE *final = final_list;
+	struct heaps_slot *heap = &heaps[i];
 
-	p = heaps[i].slot; pend = p + heaps[i].limit;
+	p = heap->slot; pend = heap->slotlimit;
 	while (p < pend) {
-	    if (!(p->as.basic.flags & FL_MARK)) {
+	    if (!rb_mark_table_heap_contains(heap, p)) {
 		if (p->as.basic.flags) {
 		    obj_free((VALUE)p);
 		}
 		if (need_call_final && FL_TEST(p, FL_FINALIZE)) {
-		    p->as.free.flags = FL_MARK; /* remain marked */
+		    p->as.free.flags = FL_FINALIZE;
 		    p->as.free.next = final_list;
 		    final_list = p;
 		}
 		else {
                     VALGRIND_MAKE_MEM_UNDEFINED((void*)p, sizeof(RVALUE));
-		    p->as.free.flags = 0;
-		    p->as.free.next = freelist;
+		    /* Do not touch the fields if they don't have to be modified.
+		     * This is in order to preserve copy-on-write semantics.
+		     */
+		    if (p->as.free.flags != 0)
+			p->as.free.flags = 0;
+		    if (p->as.free.next != freelist)
+			p->as.free.next = freelist;
 		    freelist = p;
 		}
 		n++;
 	    }
-	    else if (RBASIC(p)->flags == FL_MARK) {
+	    else if (RBASIC(p)->flags == FL_FINALIZE) {
 		/* objects to be finalized */
-		/* do nothing remain marked */
+		/* do nothing here */
 	    }
 	    else {
-		RBASIC(p)->flags &= ~FL_MARK;
+		rb_mark_table_heap_remove(heap, p);
 		live++;
 	    }
 	    p++;
@@ -1286,6 +1395,7 @@
     VALGRIND_MAKE_MEM_UNDEFINED((void*)p, sizeof(RVALUE));
     RANY(p)->as.free.flags = 0;
     RANY(p)->as.free.next = freelist;
+    rb_mark_table_remove((RVALUE *) p);
     freelist = RANY(p);
 }
 
@@ -1476,6 +1586,7 @@
 
     FLUSH_REGISTER_WINDOWS;
     /* This assumes that all registers are saved into the jmp_buf (and stack) */
+    memset(save_regs_gc_mark, 0, sizeof(save_regs_gc_mark));
     setjmp(save_regs_gc_mark);
     mark_locations_array((VALUE*)save_regs_gc_mark,
 			 sizeof(save_regs_gc_mark) / sizeof(VALUE));
@@ -1515,6 +1626,7 @@
 
     SET_STACK_END;
 
+    last_heap = NULL;
     init_mark_stack();
 
     th->vm->self ? rb_gc_mark(th->vm->self) : rb_vm_mark(th->vm);
@@ -1616,6 +1728,18 @@
     rb_gc_stack_maxsize = size;
 }
 
+int
+rb_gc_is_thread_marked(the_thread)
+    VALUE the_thread;
+{
+    if (FL_ABLE(the_thread)) {
+	return rb_mark_table_contains((RVALUE *) the_thread);
+    }
+    else {
+	return 0;
+    }
+}
+
 void
 Init_stack(VALUE *addr)
 {
@@ -1837,12 +1961,9 @@
     VALUE of;
 
     rb_secure(4);
-    if (argc == 0) {
+    if (rb_scan_args(argc, argv, "01", &of) == 0) {
 	of = 0;
     }
-    else {
-	rb_scan_args(argc, argv, "01", &of);
-    }
     RETURN_ENUMERATOR(os, 1, &of);
     return os_obj_of(of);
 }
@@ -2025,6 +2146,7 @@
 rb_gc_call_finalizer_at_exit(void)
 {
     RVALUE *p, *pend;
+    struct heaps_slot *heap;
     int i;
 
     /* finalizers are part of garbage collection */
@@ -2048,12 +2170,14 @@
     }
     /* run data object's finalizers */
     for (i = 0; i < heaps_used; i++) {
-	p = heaps[i].slot; pend = p + heaps[i].limit;
+	heap = &heaps[i];
+	p = heap->slot; pend = heap->slotlimit;
 	while (p < pend) {
 	    if (BUILTIN_TYPE(p) == T_DATA &&
 		DATA_PTR(p) && RANY(p)->as.data.dfree &&
 		RANY(p)->as.basic.klass != rb_cThread) {
 		p->as.free.flags = 0;
+		rb_mark_table_heap_remove(heap, p);
 		if ((long)RANY(p)->as.data.dfree == -1) {
 		    RUBY_CRITICAL(free(DATA_PTR(p)));
 		}
@@ -2065,6 +2189,7 @@
 	    else if (BUILTIN_TYPE(p) == T_FILE) {
 		if (rb_io_fptr_finalize(RANY(p)->as.file.fptr)) {
 		    p->as.free.flags = 0;
+		    rb_mark_table_heap_remove(heap, p);
                     VALGRIND_MAKE_MEM_UNDEFINED((void*)p, sizeof(RVALUE));
 		}
 	    }
@@ -2285,6 +2410,61 @@
     return hash;
 }
 
+static VALUE
+os_statistics()
+{
+    int i;
+    int n = 0;
+    unsigned int objects = 0;
+    unsigned int total_heap_size = 0;
+    unsigned int ast_nodes = 0;
+    char message[1024];
+
+    for (i = 0; i < heaps_used; i++) {
+	RVALUE *p, *pend;
+
+	p = heaps[i].slot;
+	pend = p + heaps[i].limit;
+	for (;p < pend; p++) {
+	    if (p->as.basic.flags) {
+		int isAST = 0;
+		switch (TYPE(p)) {
+		  case T_ICLASS:
+		  case T_NODE:
+		    isAST = 1;
+		    break;
+		  case T_CLASS:
+		    if (FL_TEST(p, FL_SINGLETON)) {
+		        isAST = 1;
+		        break;
+		    }
+		  default:
+		    break;
+		}
+		objects++;
+		if (isAST) {
+		   ast_nodes++;
+		}
+	    }
+	}
+	total_heap_size += (void*)pend - heaps[i].membase;
+    }
+
+    snprintf(message, sizeof(message),
+        "Number of objects: %d (%d AST nodes, %.2f%%)\n"
+        "Heap slot size: %d\n"
+        "Number of heaps: %d\n"
+        "Total size of objects: %.2f KB\n"
+        "Total size of heaps: %.2f KB\n",
+        objects, ast_nodes, ast_nodes * 100 / (double) objects,
+        sizeof(RVALUE),
+        heaps_used,
+        objects * sizeof(RVALUE) / 1024.0,
+        total_heap_size / 1024.0
+    );
+    return rb_str_new2(message);
+}
+
 /*
  *  The <code>GC</code> module provides an interface to Ruby's mark and
  *  sweep garbage collection mechanism. Some of the underlying methods
@@ -2317,6 +2497,8 @@
 
     rb_define_module_function(rb_mObSpace, "_id2ref", id2ref, 1);
 
+    rb_define_module_function(rb_mObSpace, "statistics", os_statistics, 0);
+
     rb_gc_register_address(&rb_mObSpace);
     rb_global_variable(&finalizers);
     rb_gc_unregister_address(&rb_mObSpace);
@@ -2332,4 +2514,6 @@
     rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0);
 
     rb_define_module_function(rb_mObSpace, "count_objects", count_objects, -1);
+
+    init_debugging();
 }

  reply	other threads:[~2008-03-08 10:03 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-03-03  9:48 Copy-on-write friendly garbage collector Hongli Lai
2008-03-03 12:38 ` Daniel DeLorme
2008-03-03 13:11 ` Yukihiro Matsumoto
2008-03-04 11:31   ` Gonzalo Garramuño
2008-03-07 12:04   ` Hongli Lai
2008-03-07 15:20     ` Paul Brannan
2008-03-07 16:22       ` Hongli Lai
2008-03-07 18:47         ` Joel VanderWerf
2008-03-08  5:34   ` Daniel DeLorme
2008-03-08  7:50     ` Daniel DeLorme
2008-03-08 10:01       ` Daniel DeLorme [this message]
2008-03-08 15:39         ` Yukihiro Matsumoto
2008-03-12 17:23       ` Hongli Lai
2008-03-12 17:38         ` Yukihiro Matsumoto
2008-03-13  0:48         ` Daniel DeLorme
2008-03-13 11:04           ` Hongli Lai
2008-03-15 16:15         ` Hongli Lai
2008-03-13 11:18     ` Hongli Lai
2008-03-14  3:20     ` Hongli Lai
2008-03-14  4:44       ` Daniel DeLorme
2008-03-14 11:25         ` Hongli Lai
2008-03-14 12:01           ` Meinrad Recheis
2008-03-14 15:00           ` Daniel Berger
2008-03-14 15:53             ` Hongli Lai
2008-03-14 17:34               ` Joel VanderWerf
2008-03-20 10:59     ` Hongli Lai
2008-03-20 11:55       ` Yukihiro Matsumoto
2008-03-20 12:54         ` Hongli Lai
2008-03-20 14:24           ` Yukihiro Matsumoto
2008-03-20 14:45             ` Hongli Lai
2008-03-20 15:28             ` Joel VanderWerf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-list from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.ruby-lang.org/en/community/mailing-lists/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=47D26413.5060708@dan42.com \
    --to=ruby-core@ruby-lang.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).