From: "vo.x (Vit Ondruch) via ruby-core" <ruby-core@ml.ruby-lang.org>
To: ruby-core@ml.ruby-lang.org
Cc: "vo.x (Vit Ondruch)" <noreply@ruby-lang.org>
Subject: [ruby-core:116403] [Ruby master Bug#20203] `TestEnumerable` test failures with GCC 14
Date: Wed, 24 Jan 2024 11:07:37 +0000 (UTC) [thread overview]
Message-ID: <redmine.journal-106424.20240124110737.703@ruby-lang.org> (raw)
In-Reply-To: redmine.issue-20203.20240122171042.703@ruby-lang.org
Issue #20203 has been updated by vo.x (Vit Ondruch).
So there is more insights from glibc developers and it seems the issue is that "Ruby uses qsort_r in an undefined way". Let me quote @fweimer from [RH bugzilla](https://bugzilla.redhat.com/show_bug.cgi?id=2259845#c15):
~~~
In current rawhide glibc (glibc-2.38.9000-33.fc40.x86_64), a buffer allocated with malloc is used for the qsort scratch buffer. This is actually a glibc bug because the array is very short and we should use an on-stack buffer.
I need to confirm the details yet, but I think what happens is that the Ruby garbage collector runs during the sort_by callback. I suspect the collector writes to the array, which is quite undefined (“The comparison function shall not alter the contents of the array.” says the C standard). This causes problems subsequently when we copy back previous array contents from the scratch buffer. With a stack-based buffer, the collector pins objects, so the issue is not visible. Sorry, this is all very speculative, but I don't want you to spend more time chasing this.
I can reproduce the crash in Fedora 38 (with upstream Ruby sources) if I increase the size of the array being sorted so that qsort_r uses a malloc-based buffer there as well:
diff --git a/test/ruby/test_enum.rb b/test/ruby/test_enum.rb
index f7c8f012d8..23e18cc590 100644
--- a/test/ruby/test_enum.rb
+++ b/test/ruby/test_enum.rb
@@ -871,7 +871,9 @@ class << o; self; end.class_eval do
0
end
end
- [o, o, o].sort_by {|x| x }
+ l = []
+ (1..100).each {|x| l += [o] }
+ l.sort_by {|x| x }
c.call
end
The whole thing is probably quite sensitive to allocation patterns etc., so I have no idea how reliable this is as a trigger for the bug.
~~~
and [followup](https://bugzilla.redhat.com/show_bug.cgi?id=2259845#c18)
~~~
With this instrumentation patch applied to glibc:
diff --git a/stdlib/qsort.c b/stdlib/qsort.c
index 7f5a00fb33..c5263d9f5f 100644
--- a/stdlib/qsort.c
+++ b/stdlib/qsort.c
@@ -25,6 +25,7 @@
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
+#include <assert.h>
/* Swap SIZE bytes between addresses A and B. These helpers are provided
along the generic one as an optimization. */
@@ -338,9 +339,9 @@ indirect_msort_with_tmp (const struct msort_param *p, void *b, size_t n,
}
}
-void
-__qsort_r (void *const pbase, size_t total_elems, size_t size,
- __compar_d_fn_t cmp, void *arg)
+static void
+__qsort_r_real (void *const pbase, size_t total_elems, size_t size,
+ __compar_d_fn_t cmp, void *arg)
{
if (total_elems <= 1)
return;
@@ -396,6 +397,43 @@ __qsort_r (void *const pbase, size_t total_elems, size_t size,
if (buf != tmp)
free (buf);
}
+
+struct qsort_r_data
+{
+ __compar_d_fn_t cmp;
+ void *arg;
+ void *array;
+ size_t size;
+ void *copy;
+};
+
+static int
+qsort_compare_wrapper (const void *a, const void *b, void *data1)
+{
+ struct qsort_r_data *data = data1;
+ memcpy (data->copy, data->array, data->size);
+ int ret = data->cmp (a, b, data->arg);
+ assert (memcmp (data->array, data->copy, data->size) == 0);
+ return ret;
+}
+
+void
+__qsort_r (void *pbase, size_t total_elems, size_t size,
+ __compar_d_fn_t cmp, void *arg)
+{
+ struct qsort_r_data data =
+ {
+ .cmp = cmp,
+ .arg = arg,
+ .array = pbase,
+ .size = total_elems * size,
+ };
+ data.copy = malloc (data.size);
+ assert (data.copy != NULL);
+ __qsort_r_real (pbase, total_elems, size, qsort_compare_wrapper, &data);
+ free (data.copy);
+}
+
libc_hidden_def (__qsort_r)
weak_alias (__qsort_r, qsort_r)
And using the Fedora rawhide glibc variant with the heap allocation and the unchanged Ruby test case, I get:
[54/83] TestEnumerable#test_callccFatal glibc error: qsort.c:416 (qsort_compare_wrapper): assertion failed: memcmp (data->array, data->copy, data->size) == 0
Thread 1 "ruby" received signal SIGABRT, Aborted.
__pthread_kill_implementation (threadid=<optimized out>, signo=signo@entry=6,
no_tid=no_tid@entry=0) at pthread_kill.c:44
44 return INTERNAL_SYSCALL_ERROR_P (ret) ? INTERNAL_SYSCALL_ERRNO (ret) : 0;
(gdb) bt
#0 __pthread_kill_implementation (threadid=<optimized out>,
signo=signo@entry=6, no_tid=no_tid@entry=0) at pthread_kill.c:44
#1 0x00007ffff7c57423 in __pthread_kill_internal (signo=6,
threadid=<optimized out>) at pthread_kill.c:78
#2 0x00007ffff7c0493e in __GI_raise (sig=sig@entry=6)
at ../sysdeps/posix/raise.c:26
#3 0x00007ffff7bec8ff in __GI_abort () at abort.c:79
#4 0x00007ffff7bed7d5 in __libc_message_impl (
fmt=fmt@entry=0x7ffff7d6cba0 "Fatal glibc error: %s:%s (%s): assertion failed: %s\n") at ../sysdeps/posix/libc_fatal.c:132
#5 0x00007ffff7bfcaa9 in __libc_assert_fail (
assertion=assertion@entry=0x7ffff7d6cd70 "memcmp (data->array, data->copy, data->size) == 0", file=file@entry=0x7ffff7d67d51 "qsort.c",
line=line@entry=416,
function=function@entry=0x7ffff7d71390 <__PRETTY_FUNCTION__.1> "qsort_compare_wrapper") at __libc_assert_fail.c:31
#6 0x00007ffff7c0873c in qsort_compare_wrapper (a=a@entry=0x7fffdc852fe0,
b=b@entry=0x7fffdc852ff0, data1=data1@entry=0x7fffffffd520) at qsort.c:416
#7 0x00007ffff7c08923 in msort_with_tmp (p=p@entry=0x7fffffffd0a0,
b=b@entry=0x7fffdc852fe0, n=n@entry=2) at qsort.c:276
#8 0x00007ffff7c08ced in msort_with_tmp (n=2, b=0x7fffdc852fe0,
p=0x7fffffffd0a0) at qsort.c:202
#9 __qsort_r_real (pbase=pbase@entry=0x7fffdc852fe0,
total_elems=total_elems@entry=2, size=size@entry=16,
arg=arg@entry=0x7fffffffd520, cmp=0x7ffff7c086c0 <qsort_compare_wrapper>)
at qsort.c:394
#10 0x00007ffff7c09140 in __GI___qsort_r (pbase=0x7fffdc852fe0, total_elems=2,
size=size@entry=16, cmp=cmp@entry=0x5555559709a0 <sort_by_cmp>,
arg=arg@entry=0x7fffdc852fd0) at qsort.c:433
#11 0x000055555596f3ad in enum_sort_by (obj=<optimized out>) at enum.c:1691
I think that's pretty good evidence that ruby uses qsort_r in an undefined way
~~~
----------------------------------------
Bug #20203: `TestEnumerable` test failures with GCC 14
https://bugs.ruby-lang.org/issues/20203#change-106424
* Author: vo.x (Vit Ondruch)
* Status: Open
* Priority: Normal
* ruby -v: ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [x86_64-linux]
* Backport: 3.0: UNKNOWN, 3.1: UNKNOWN, 3.2: UNKNOWN, 3.3: UNKNOWN
----------------------------------------
There is ongoing mass rebuild in Fedora and that is first time GCC 14 is used and we observe test failures in `TestEnumerable`. Here are a few examples:
~~~
[ 3000/26419] TestEnumerable#test_transient_heap_sort_bymalloc_consolidate(): unaligned fastbin chunk detected
~~~
~~~
[ 2455/26535] TestEnumerable#test_transient_heap_sort_bycorrupted size vs. prev_size in fastbins
~~~
~~~
[ 9716/26532] TestEnumerable#test_any_with_unused_blockdouble free or corruption (fasttop)
~~~
The full logs are accessible [here](https://koji.fedoraproject.org/koji/taskinfo?taskID=112176941). Please drill through `Descendants` and `build.log`
--
https://bugs.ruby-lang.org/
______________________________________________
ruby-core mailing list -- ruby-core@ml.ruby-lang.org
To unsubscribe send an email to ruby-core-leave@ml.ruby-lang.org
ruby-core info -- https://ml.ruby-lang.org/mailman3/postorius/lists/ruby-core.ml.ruby-lang.org/
next prev parent reply other threads:[~2024-01-24 11:07 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-01-22 17:10 [ruby-core:116370] [Ruby master Bug#20203] `TestEnumerable` test failures with GCC 14 vo.x (Vit Ondruch) via ruby-core
2024-01-23 11:45 ` [ruby-core:116375] " vo.x (Vit Ondruch) via ruby-core
2024-01-23 11:57 ` [ruby-core:116376] " vo.x (Vit Ondruch) via ruby-core
2024-01-23 13:42 ` [ruby-core:116378] " vo.x (Vit Ondruch) via ruby-core
2024-01-23 16:53 ` [ruby-core:116383] " vo.x (Vit Ondruch) via ruby-core
2024-01-24 11:07 ` vo.x (Vit Ondruch) via ruby-core [this message]
2024-01-24 11:27 ` [ruby-core:116407] " fweimer (Florian Weimer) via ruby-core
2024-01-25 0:18 ` [ruby-core:116434] " alanwu (Alan Wu) via ruby-core
2024-02-01 16:21 ` [ruby-core:116548] " alanwu (Alan Wu) via ruby-core
2024-02-14 5:30 ` [ruby-core:116732] " ko1 (Koichi Sasada) via ruby-core
2024-03-14 9:40 ` [ruby-core:117148] " mame (Yusuke Endoh) via ruby-core
2024-03-20 18:32 ` [ruby-core:117266] " alanwu (Alan Wu) via ruby-core
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-list from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://www.ruby-lang.org/en/community/mailing-lists/
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=redmine.journal-106424.20240124110737.703@ruby-lang.org \
--to=ruby-core@ruby-lang.org \
--cc=noreply@ruby-lang.org \
--cc=ruby-core@ml.ruby-lang.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).