unofficial mirror of libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] Fix the inaccuracy of j0f (BZ 14469).
@ 2021-01-18  7:13 Paul Zimmermann
  2021-01-18 20:26 ` Joseph Myers
  0 siblings, 1 reply; 3+ messages in thread
From: Paul Zimmermann @ 2021-01-18  7:13 UTC (permalink / raw)
  To: libc-alpha

The largest error for all binary32 inputs is now less than 9.5 ulps with
respect to the infinite precision result, i.e., less than 9 ulps after
rounding, which is the largest error allowed. (This is for rounding to
nearest, for other rounding modes, the new code should not behave worse than
the old one.)

The new code is enabled only when there is a cancellation at the very end of
the j0f computation, thus should not give any visible slowdown on average.
When there is a cancellation, two different algorithms are used:

* around the first 64 zeros of j0, approximation polynomials of degree 3 are
  used, which were computed using the Sollya tool (https://www.sollya.org/)

* for large inputs, an asymptotic formula is used from [1]

[1] Fast and Accurate Bessel Function Computation,
John Harrison, Proceedings of Arith 19, 2009.
---
 math/auto-libm-test-in            |   3 +-
 math/auto-libm-test-out-j0        |  25 +++
 sysdeps/ieee754/flt-32/e_j0f.c    | 341 +++++++++++++++++++++++++++++-
 sysdeps/x86_64/fpu/libm-test-ulps |  14 +-
 4 files changed, 369 insertions(+), 14 deletions(-)

diff --git a/math/auto-libm-test-in b/math/auto-libm-test-in
index 73840b8bef..dbf1dca35c 100644
--- a/math/auto-libm-test-in
+++ b/math/auto-libm-test-in
@@ -5756,8 +5756,9 @@ j0 -0x1.001000001p+593
 j0 0x1p1023
 j0 0x1p16382
 j0 0x1p16383
-# the next value generates larger error bounds on x86_64 (binary32)
+# the next values generate large error bounds on x86_64 (binary32)
 j0 0x2.602774p+0 xfail-rounding:ibm128-libgcc
+j0 0x1.8bde7ep+5
 # the next value exercises the flt-32 code path for x >= 2^127
 j0 0x8.2f4ecp+124
 
diff --git a/math/auto-libm-test-out-j0 b/math/auto-libm-test-out-j0
index cc1fea074e..d659f47632 100644
--- a/math/auto-libm-test-out-j0
+++ b/math/auto-libm-test-out-j0
@@ -1359,6 +1359,31 @@ j0 0x2.602774p+0 xfail-rounding:ibm128-libgcc
 = j0 tonearest ibm128 0x2.602774p+0 : 0x3.e83779fe19911fa806cee83ab7p-8 : inexact-ok
 = j0 towardzero ibm128 0x2.602774p+0 : 0x3.e83779fe19911fa806cee83ab7p-8 : xfail:ibm128-libgcc inexact-ok
 = j0 upward ibm128 0x2.602774p+0 : 0x3.e83779fe19911fa806cee83ab8p-8 : xfail:ibm128-libgcc inexact-ok
+j0 0x1.8bde7ep+5
+= j0 downward binary32 0x3.17bcfcp+4 : 0x7.a5f028p-16 : inexact-ok
+= j0 tonearest binary32 0x3.17bcfcp+4 : 0x7.a5f03p-16 : inexact-ok
+= j0 towardzero binary32 0x3.17bcfcp+4 : 0x7.a5f028p-16 : inexact-ok
+= j0 upward binary32 0x3.17bcfcp+4 : 0x7.a5f03p-16 : inexact-ok
+= j0 downward binary64 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77cp-16 : inexact-ok
+= j0 tonearest binary64 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77cp-16 : inexact-ok
+= j0 towardzero binary64 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77cp-16 : inexact-ok
+= j0 upward binary64 0x3.17bcfcp+4 : 0x7.a5f02c0a5b78p-16 : inexact-ok
+= j0 downward intel96 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0a8p-16 : inexact-ok
+= j0 tonearest intel96 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0bp-16 : inexact-ok
+= j0 towardzero intel96 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0a8p-16 : inexact-ok
+= j0 upward intel96 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0bp-16 : inexact-ok
+= j0 downward m68k96 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0a8p-16 : inexact-ok
+= j0 tonearest m68k96 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0bp-16 : inexact-ok
+= j0 towardzero m68k96 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0a8p-16 : inexact-ok
+= j0 upward m68k96 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0bp-16 : inexact-ok
+= j0 downward binary128 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0af558d6935f168p-16 : inexact-ok
+= j0 tonearest binary128 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0af558d6935f168p-16 : inexact-ok
+= j0 towardzero binary128 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0af558d6935f168p-16 : inexact-ok
+= j0 upward binary128 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0af558d6935f16cp-16 : inexact-ok
+= j0 downward ibm128 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0af558d6935fp-16 : inexact-ok
+= j0 tonearest ibm128 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0af558d6935f2p-16 : inexact-ok
+= j0 towardzero ibm128 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0af558d6935fp-16 : inexact-ok
+= j0 upward ibm128 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0af558d6935f2p-16 : inexact-ok
 j0 0x8.2f4ecp+124
 = j0 downward binary32 0x8.2f4ecp+124 : 0xd.331efp-68 : inexact-ok
 = j0 tonearest binary32 0x8.2f4ecp+124 : 0xd.331efp-68 : inexact-ok
diff --git a/sysdeps/ieee754/flt-32/e_j0f.c b/sysdeps/ieee754/flt-32/e_j0f.c
index 5d29611eb7..f6efab828c 100644
--- a/sysdeps/ieee754/flt-32/e_j0f.c
+++ b/sysdeps/ieee754/flt-32/e_j0f.c
@@ -37,6 +37,330 @@ S04  =  1.1661400734e-09; /* 0x30a045e8 */
 
 static const float zero = 0.0;
 
+#define FIRST_ZERO 2.404825f
+
+#define SMALL_SIZE 64
+
+/* The following table contains successive zeros of j0 and degree-3 polynomial
+   approximations of j0 around these zeros: P[0] for the first zero (2.404825), P[1]
+   for the second one (5.520078), and so one. Each line contains:
+              {x0, xmid, x1, p0, p1, p2, p3}
+   where [x0,x1] is the interval around the zero, xmid is the binary32 number closest
+   to the zero, and p0+p1*x+p2*x^2+p3*x^3 is the approximation polynomial.
+   Each polynomial was generated using Remez' algorithm on the interval [x0,x1]
+   around the corresponding zero where the error is larger than 9 ulps for the
+   default code below.
+   Degree 3 is enough to get an error less than 4 ulps.
+*/
+static float P[SMALL_SIZE][7] = {
+{0x2.63cb8cp+0,0x2.67a2a4p+0,0x2.6f5d28p+0,0xf.26247p-28,-0x8.4e6d9p-4,0x1.ba1c7p-4,0xe.6c06ap-8},
+/* the following polynomial was generated by Sollya */
+{0x5.83abe8p+0,0x5.8523d8p+0,0x5.8a557p+0,0x6.9205fp-28,0x5.71b98p-4,-0x7.e3e798p-8,-0xd.87d1p-8},
+{0x8.a66f1p+0,0x8.a75abp+0,0x8.a92a7p+0,0x1.bcc1cap-24,-0x4.57de6p-4,0x4.03debp-8,0xb.44a4ap-8},
+{0xb.c9905p+0,0xb.caa2p+0,0xb.ccc6bp+0,-0xf.2976fp-32,0x3.b827ccp-4,-0x2.85fdb8p-8,-0x9.c5a97p-8},
+{0xe.edb6ap+0,0xe.ee50ap+0,0xe.ef864p+0,-0x1.bd67d8p-28,-0x3.4e03ap-4,0x1.c54b8ep-8,0x8.bb70dp-8},
+{0x1.2119fp+4,0x1.212314p+4,0x1.21375p+4,0x1.62209cp-28,0x3.00efecp-4,-0x1.5467fp-8,-0x7.f5a2p-8},
+{0x1.535d28p+4,0x1.5362dep+4,0x1.536cbcp+4,-0x2.853f74p-24,-0x2.c5b274p-4,0x1.0bab0ap-8,0x7.5c05b8p-8},
+{0x1.859df6p+4,0x1.85a3bap+4,0x1.85afcap+4,0x2.19ed1cp-24,0x2.96545cp-4,-0xd.995c9p-12,-0x6.e024cp-8},
+{0x1.b7dfe6p+4,0x1.b7e54ap+4,0x1.b7ebccp+4,0xe.959aep-28,-0x2.6f5594p-4,0xb.55ff1p-12,0x6.79cf58p-8},
+{0x1.ea22bcp+4,0x1.ea275ap+4,0x1.ea2e2ep+4,0x2.0c3964p-24,0x2.4e80fcp-4,-0x9.a35abp-12,-0x6.234b08p-8},
+{0x2.1c6638p+4,0x2.1c69c4p+4,0x2.1c6d7cp+4,-0x3.642554p-24,-0x2.325e48p-4,0x8.534f1p-12,0x5.d9048p-8},
+{0x2.4ea8dcp+4,0x2.4eac7p+4,0x2.4eb39cp+4,0x1.6c015ap-24,0x2.19e7d8p-4,-0x7.4915f8p-12,-0x5.984698p-8},
+{0x2.80ec2p+4,0x2.80ef5p+4,0x2.80f72cp+4,-0x4.b18c9p-28,-0x2.046174p-4,0x6.720468p-12,0x5.5f426p-8},
+{0x2.b32e6p+4,0x2.b33258p+4,0x2.b33654p+4,-0x1.8b8792p-24,0x1.f13fbp-4,-0x5.c149dp-12,-0x5.2c935p-8},
+{0x2.e5736p+4,0x2.e5758p+4,0x2.e57894p+4,0x3.a26e0cp-24,-0x1.e018dap-4,0x5.2df918p-12,0x4.ff0f68p-8},
+{0x3.17b694p+4,0x3.17b8c4p+4,0x3.17bcecp+4,-0x2.18fabcp-24,0x1.d09b22p-4,-0x4.b1c31p-12,-0x4.d5ecd8p-8},
+{0x3.49f9d8p+4,0x3.49fc1cp+4,0x3.4a0084p+4,0x3.2370e8p-24,-0x1.c28614p-4,0x4.47bb78p-12,0x4.b08458p-8},
+{0x3.7c3d78p+4,0x3.7c3f88p+4,0x3.7c43ep+4,-0x5.9eae3p-28,0x1.b5a622p-4,-0x3.ec892cp-12,-0x4.8e4d3p-8},
+{0x3.ae80ap+4,0x3.ae83p+4,0x3.ae8528p+4,0x2.9fa1e8p-24,-0x1.a9d184p-4,0x3.9d2fa8p-12,0x4.6edccp-8},
+{0x3.e0c484p+4,0x3.e0c688p+4,0x3.e0c8a4p+4,0x9.9ac67p-28,0x1.9ee5eep-4,-0x3.57e9ep-12,-0x4.51d1e8p-8},
+{0x4.1308f8p+4,0x4.130a18p+4,0x4.130c58p+4,0xd.6ab94p-28,-0x1.94c6f6p-4,0x3.1ac03p-12,0x4.36e4bp-8},
+{0x4.454cp+4,0x4.454dbp+4,0x4.45504p+4,-0x4.4cb2d8p-24,0x1.8b5cccp-4,-0x2.e477ap-12,-0x4.1dd858p-8},
+{0x4.778f6p+4,0x4.779158p+4,0x4.779368p+4,-0x4.4aa8c8p-24,-0x1.829356p-4,0x2.b4726cp-12,0x4.0676dp-8},
+{0x4.a9d3ep+4,0x4.a9d5p+4,0x4.a9d6cp+4,0x2.077c38p-24,0x1.7a597ep-4,-0x2.891dbcp-12,-0x3.f09154p-8},
+{0x4.dc175p+4,0x4.dc18bp+4,0x4.dc1a08p+4,-0x2.6a6cd8p-24,-0x1.72a09ap-4,0x2.62315cp-12,0x3.dc034p-8},
+{0x5.0e5bb8p+4,0x5.0e5c6p+4,0x5.0e5dbp+4,-0x5.08287p-24,0x1.6b5c06p-4,-0x2.3ec48p-12,-0x3.c8a91cp-8},
+{0x5.409ebp+4,0x5.40a02p+4,0x5.40a188p+4,-0x3.4598dcp-24,-0x1.6480c4p-4,0x2.1f1798p-12,0x3.b667ccp-8},
+{0x5.72e268p+4,0x5.72e3ep+4,0x5.72e54p+4,0x5.4e74bp-24,0x1.5e0544p-4,-0x2.021254p-12,-0x3.a5248cp-8},
+{0x5.a5263p+4,0x5.a527ap+4,0x5.a528d8p+4,-0x2.05751cp-24,-0x1.57e12p-4,0x1.e7643ep-12,0x3.94c994p-8},
+{0x5.d76acp+4,0x5.d76b68p+4,0x5.d76ccp+4,0x4.c5e278p-24,0x1.520ceep-4,-0x1.cf1d4ep-12,-0x3.85428cp-8},
+{0x6.09ae88p+4,0x6.09af3p+4,0x6.09b0bp+4,-0x3.50e62cp-24,-0x1.4c822p-4,0x1.b8ab9ap-12,0x3.767f34p-8},
+{0x6.3bf21p+4,0x6.3bf2f8p+4,0x6.3bf418p+4,-0x1.c39f38p-24,0x1.473ae6p-4,-0x1.a3dccep-12,-0x3.68700cp-8},
+{0x6.6e362p+4,0x6.6e36c8p+4,0x6.6e3818p+4,-0x1.9245b6p-28,-0x1.42320ap-4,0x1.90d5f2p-12,0x3.5b0634p-8},
+{0x6.a079dp+4,0x6.a07a98p+4,0x6.a07b78p+4,-0x1.0acf8p-24,0x1.3d62e6p-4,-0x1.7f1e42p-12,-0x3.4e3678p-8},
+{0x6.d2bda8p+4,0x6.d2be68p+4,0x6.d2bfb8p+4,0x4.cd92d8p-24,-0x1.38c94ap-4,0x1.6e94e2p-12,0x3.41f4acp-8},
+{0x7.05018p+4,0x7.05024p+4,0x7.0503p+4,-0x1.37bf8ap-24,0x1.34617p-4,-0x1.5f6a22p-12,-0x3.3637c8p-8},
+{0x7.37459p+4,0x7.374618p+4,0x7.3747ap+4,-0x1.8f62dep-28,-0x1.3027fp-4,0x1.51357ap-12,0x3.2af594p-8},
+{0x7.69892p+4,0x7.6989fp+4,0x7.698b98p+4,-0x9.81e79p-28,0x1.2c19b4p-4,-0x1.43e0aep-12,-0x3.20271p-8},
+{0x7.9bccf8p+4,0x7.9bcdc8p+4,0x7.9bceap+4,0x3.103b3p-24,-0x1.2833eep-4,0x1.37580ep-12,0x3.15c484p-8},
+{0x7.ce10dp+4,0x7.ce11a8p+4,0x7.ce136p+4,0x2.07b058p-24,0x1.24740ap-4,-0x1.2bd334p-12,-0x3.0bc628p-8},
+{0x8.0054cp+4,0x8.00558p+4,0x8.00562p+4,0x3.87576cp-24,-0x1.20d7b6p-4,0x1.20af6cp-12,0x3.022738p-8},
+{0x8.32994p+4,0x8.32996p+4,0x8.329a4p+4,-0x1.691ecp-24,0x1.1d5ccap-4,-0x1.167022p-12,-0x2.f8e084p-8},
+{0x8.64dcdp+4,0x8.64dd4p+4,0x8.64dd9p+4,0x9.b406dp-28,-0x1.1a015p-4,0x1.0cbfd2p-12,0x2.efee34p-8},
+{0x8.97209p+4,0x8.97212p+4,0x8.9721bp+4,-0xf.bfd8fp-28,0x1.16c37ap-4,-0x1.039356p-12,-0x2.e74a78p-8},
+{0x8.c9649p+4,0x8.c965p+4,0x8.c965bp+4,0x2.6d50c8p-24,-0x1.13a19ep-4,0xf.ae0ap-16,0x2.def13p-8},
+{0x8.fba89p+4,0x8.fba8ep+4,0x8.fba9dp+4,-0x4.d475c8p-24,0x1.109a32p-4,-0xf.29e9cp-16,-0x2.d6de4cp-8},
+{0x9.2dec7p+4,0x9.2deccp+4,0x9.2dedp+4,0x8.1982p-24,-0x1.0dabc8p-4,0xe.ac514p-16,0x2.cf0e6p-8},
+{0x9.60306p+4,0x9.6030bp+4,0x9.60318p+4,0x4.864518p-24,0x1.0ad51p-4,-0xe.3d1fbp-16,-0x2.c77d28p-8},
+{0x9.92743p+4,0x9.92749p+4,0x9.9274ep+4,0x6.8917a8p-28,-0x1.0814d4p-4,0xd.cb25ap-16,0x2.c0283p-8},
+{0x9.c4b81p+4,0x9.c4b87p+4,0x9.c4b8ep+4,-0x5.fa18fp-24,0x1.0569fp-4,-0xd.5e6d8p-16,-0x2.b90bep-8},
+{0x9.f6fc2p+4,0x9.f6fc6p+4,0x9.f6fcep+4,-0x4.0e5c98p-24,-0x1.02d354p-4,0xc.feb37p-16,0x2.b2259p-8},
+{0xa.293f8p+4,0xa.29404p+4,0xa.29408p+4,-0x2.c3ddbp-24,0x1.005004p-4,-0xc.9b641p-16,-0x2.ab72fp-8},
+{0xa.5b83bp+4,0xa.5b843p+4,0xa.5b852p+4,-0x5.d052p-24,-0xf.ddf16p-8,0xc.444dcp-16,0x2.a4f0d4p-8},
+{0xa.8dc7ap+4,0xa.8dc81p+4,0xa.8dc87p+4,-0x2.0b97dcp-24,0xf.b7fafp-8,-0xb.e93a7p-16,-0x2.9e9dccp-8},
+{0xa.c00b4p+4,0xa.c00cp+4,0xa.c00c4p+4,-0x5.4aab5p-24,-0xf.930fep-8,0xb.99b61p-16,0x2.98774p-8},
+{0xa.f24f5p+4,0xa.f24fep+4,0xa.f2509p+4,-0x3.6dadd8p-24,0xf.6f245p-8,-0xb.45e12p-16,-0x2.927b08p-8},
+{0xb.24939p+4,0xb.2493dp+4,0xb.24948p+4,-0x2.d7e038p-24,-0xf.4c2cep-8,0xa.fd076p-16,0x2.8ca788p-8},
+{0xb.56d73p+4,0xb.56d7bp+4,0xb.56d82p+4,-0x6.977a1p-24,0xf.2a1fp-8,-0xa.af99ap-16,-0x2.86fb2p-8},
+{0xb.891b3p+4,0xb.891bap+4,0xb.891c7p+4,0x1.3cc95ep-24,-0xf.08f0ap-8,0xa.6ca59p-16,0x2.8173b8p-8},
+{0xb.bb5f5p+4,0xb.bb5f9p+4,0xb.bb5fdp+4,0x3.a4921p-24,0xe.e8986p-8,-0xa.2c5b8p-16,-0x2.7c1024p-8},
+{0xb.eda37p+4,0xb.eda37p+4,0xb.eda3bp+4,0x6.b45a7p-24,-0xe.c90d8p-8,0x9.e7307p-16,0x2.76ceacp-8},
+{0xc.1fe71p+4,0xc.1fe76p+4,0xc.1fe87p+4,-0x2.8f34a4p-24,0xe.aa478p-8,-0x9.abd8fp-16,-0x2.71adecp-8},
+{0xc.522aep+4,0xc.522b5p+4,0xc.522c4p+4,-0x1.325968p-24,-0xe.8c3eap-8,0x9.72bf7p-16,0x2.6cacd4p-8},
+{0xc.846eep+4,0xc.846f4p+4,0xc.846fap+4,0x4.96b808p-24,0xe.6eeb5p-8,-0x9.3bc53p-16,-0x2.67ca04p-8},
+};
+
+/* argument reduction mod 2pi: T[i] ~ 2^i mod (2*pi) */
+static double T[128] = {
+0x1p+0,
+0x2p+0,
+-0x2.487ed5110b462p+0,
+0x1.b7812aeef4b9fp+0,
+-0x2.d97c7f3321d24p+0,
+0x9.585d6aac7a1a8p-4,
+0x1.2b0bad558f435p+0,
+0x2.56175aab1e86ap+0,
+-0x1.9c501fbace38dp+0,
+0x3.0fde959b6ed46p+0,
+-0x2.8c1a9da2d9d3cp-4,
+-0x5.18353b45b3a78p-4,
+-0xa.306a768b674fp-4,
+-0x1.460d4ed16ce9ep+0,
+-0x2.8c1a9da2d9d3cp+0,
+0x1.304999cb579e8p+0,
+0x2.60933396af3dp+0,
+-0x1.87586de3accc3p+0,
+-0x3.0eb0dbc759986p+0,
+0x2.b1d1d8258155p-4,
+0x5.63a3b04b02aap-4,
+0xa.c74760960554p-4,
+0x1.58e8ec12c0aa8p+0,
+0x2.b1d1d8258155p+0,
+-0xe.4db24c6089bf8p-4,
+-0x1.c9b6498c1137fp+0,
+0x2.b51241f8e8d64p+0,
+-0xd.e5a511f3999bp-4,
+-0x1.bcb4a23e73336p+0,
+0x2.cf15909424df4p+0,
+-0xa.a53b3e8c1877p-4,
+-0x1.54a767d1830eep+0,
+-0x2.a94ecfa3061dcp+0,
+0xf.5e135caff0a78p-4,
+0x1.ebc26b95fe14fp+0,
+-0x2.70f9fde50f1c4p+0,
+0x1.668ad946ed0dap+0,
+0x2.cd15b28dda1b4p+0,
+-0xa.e536ff5570fbp-4,
+-0x1.5ca6dfeaae1f6p+0,
+-0x2.b94dbfd55c3ecp+0,
+0xd.5e3556652c89p-4,
+0x1.abc6aacca5912p+0,
+-0x2.f0f17f77c023ep+0,
+0x6.69bd6218afe64p-4,
+0xc.d37ac4315fcc8p-4,
+0x1.9a6f58862bf99p+0,
+-0x3.13a02404b352ep+0,
+0x2.13e8d07a4a046p-4,
+0x4.27d1a0f49408cp-4,
+0x8.4fa341e928118p-4,
+0x1.09f4683d25023p+0,
+0x2.13e8d07a4a046p+0,
+-0x2.20ad341c773d4p+0,
+0x2.07246cd81ccb8p+0,
+-0x2.3a35fb60d1af2p+0,
+0x1.d412de4f67e7ep+0,
+-0x2.a05918723b764p+0,
+0x1.07cca42c94599p+0,
+0x2.0f99485928b32p+0,
+-0x2.294c445eb9dfep+0,
+0x1.f5e64c5397865p+0,
+-0x2.5cb23c69dc396p+0,
+0x1.8f1a5c3d52d34p+0,
+0x3.1e34b87aa5a68p+0,
+-0xc.15641bbff90bp-8,
+-0x1.82ac8377ff216p-4,
+-0x3.055906effe42cp-4,
+-0x6.0ab20ddffc858p-4,
+-0xc.15641bbff90bp-4,
+-0x1.82ac8377ff216p+0,
+-0x3.055906effe42cp+0,
+0x3.dccc7310ec09ap-4,
+0x7.b998e621d8134p-4,
+0xf.7331cc43b0268p-4,
+0x1.ee6639887604dp+0,
+-0x2.6bb262001f3c6p+0,
+0x1.711a1110cccd4p+0,
+0x2.e2342221999a8p+0,
+-0x8.41690cdd81128p-4,
+-0x1.082d219bb0225p+0,
+-0x2.105a43376044ap+0,
+0x2.27ca4ea24abcep+0,
+-0x1.f8ea37cc75cc6p+0,
+0x2.56aa65781fad6p+0,
+-0x1.9b2a0a20cbeb6p+0,
+0x3.122ac0cf736f6p+0,
+-0x2.4295372246764p-4,
+-0x4.852a6e448cec8p-4,
+-0x9.0a54dc8919d9p-4,
+-0x1.214a9b91233b2p+0,
+-0x2.4295372246764p+0,
+0x1.c35466cc7e59ap+0,
+-0x2.c1d607780e92ep+0,
+0xc.4d2c620ee206p-4,
+0x1.89a58c41dc40cp+0,
+0x3.134b1883b8818p+0,
+-0x2.1e8a4099a4324p-4,
+-0x4.3d14813348648p-4,
+-0x8.7a29026690c9p-4,
+-0x1.0f45204cd2192p+0,
+-0x2.1e8a4099a4324p+0,
+0x2.0b6a53ddc2e18p+0,
+-0x2.31aa2d558583p+0,
+0x1.e52a7a6600402p+0,
+-0x2.7e29e0450ac5cp+0,
+0x1.4c2b1486f5ba8p+0,
+0x2.9856290deb75p+0,
+-0x1.17d282f5345bfp+0,
+-0x2.2fa505ea68b7ep+0,
+0x1.e934c93c39d64p+0,
+-0x2.761542989799ap+0,
+0x1.5c544fdfdc12fp+0,
+0x2.b8a89fbfb825ep+0,
+-0xd.72d95919afa58p-4,
+-0x1.ae5b2b2335f4bp+0,
+0x2.ebc87eca9f5cap+0,
+-0x7.0edd77bcc8ccp-4,
+-0xe.1dbaef799198p-4,
+-0x1.c3b75def3233p+0,
+0x2.c1101932a6e02p+0,
+-0xc.65ea2abbd85fp-4,
+-0x1.8cbd45577b0bep+0,
+-0x3.197a8aaef617cp+0,
+0x1.589bfb31f1687p-4,
+0x2.b137f663e2d0ep-4,
+0x5.626fecc7c5a1cp-4,
+0xa.c4dfd98f8b438p-4,
+};
+
+/* return h such that x - pi/4 mod (2*pi) ~ h */
+static double
+reduce_mod_twopi (double x)
+{
+  double sign = 1, h;
+  if (x < 0)
+    {
+      x = -x;
+      sign = -1;
+    }
+  h = 0; /* invariant is x+h mod (2*pi) */
+  while (x >= 4)
+    {
+      int i = ilogb (x);
+      x -= ldexp (1.0f, i);
+      h += T[i];
+    }
+  /* add the remainder x */
+  h += x;
+  /* subtract pi/4 */
+  double piover4 = 0xc.90fdaa22168cp-4;
+  h -= piover4;
+  /* reduce mod 2*pi */
+  double twopi = 0x6.487ed5110b46p+0;
+  while (fabs (h) > twopi * 0.5)
+    {
+      if (h > 0)
+        h -= twopi;
+      else
+        h += twopi;
+    }
+  return sign * h;
+}
+
+/* formula page 5 of https://www.cl.cam.ac.uk/~jrh13/papers/bessel.pdf */
+static float
+j0f_asympt (float x)
+{
+  /* the following code fails to give an error <= 9 ulps in only one case,
+     for which we tabulate the result */
+  if (x == 0x1.4665d2p+24f)
+    return 0xa.50206p-52f;
+  double y = 1.0 / (double) x;
+  double y2 = y * y;
+  /* beta0 = 1 - 16/x^2 + 53/(512*x^4) */
+  double beta0 = 1.0f + y2 * (-0x1p-4f + 0x1.a8p-4 * y2);
+  /* alpha0 = 1/(8*x) - 25/(384*x^3) */
+  double alpha0 = y * (0x2p-4 - 0x1.0aaaaap-4 * y2);
+  double h;
+  h = reduce_mod_twopi ((double) x);
+  /* subtract alpha0 */
+  h -= alpha0;
+  /* now reduce mod pi/2 */
+  double piover2 = 0x1.921fb54442d18p+0;
+  int n = 0;
+  while (fabs (h) > piover2 / 2)
+    {
+      if (h > 0)
+        {
+          h -= piover2;
+          n ++;
+        }
+      else
+        {
+          h += piover2;
+          n --;
+        }
+    }
+  /* now x - pi/4 - alpha0 = h + n*pi/2 mod (2*pi) */
+  float xr = (float) h;
+  n = n & 3;
+  float cst = 0xc.c422ap-4; /* sqrt(2/pi) rounded to nearest */
+  float t = cst / sqrtf (x) * (float) beta0;
+  if (n == 0)
+    return t * cosf (xr);
+  else if (n == 2) /* cos(x+pi) = -cos(x) */
+    return -t * cosf (xr);
+  else if (n == 1) /* cos(x+pi/2) = -sin(x) */
+    return -t * sinf (xr);
+  else             /* cos(x+3pi/2) = sin(x) */
+    return t * sinf (xr);
+}
+
+/* Special code for x near a root of j0.
+   z is the value computed by the generic code.
+   For small x, we use a polynomial approximating j0 around its root.
+   For large x, we use an asymptotic formula (j0f_asympt). */
+static float
+j0f_near_root (float x, float z)
+{
+  float index_f;
+  int index;
+  index_f = roundf ((x - FIRST_ZERO) / (float) M_PI);
+  /* j0f_asympt fails to give an error <= 9 ulps for x=0x1.324e92p+7 (index 48)
+     thus we can't reduce SMALL_SIZE below 49 */
+  if (index_f >= SMALL_SIZE)
+    return j0f_asympt (x);
+  index = (int) index_f;
+  float *p = P[index];
+  float x0 = p[0];
+  float x1 = p[2];
+  /* if we are not in the interval [x0,x1] around xmid, we return the value z */
+  if (! (x0 <= x && x <= x1))
+    return z;
+  float xmid = p[1];
+  float y = x - xmid;
+  return p[3] + y * (p[4] + y * (p[5] + y * p[6]));
+}
+
 float
 __ieee754_j0f(float x)
 {
@@ -75,12 +399,17 @@ __ieee754_j0f(float x)
 	 * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x)
 	 * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x)
 	 */
-		if(ix>0x5c000000) z = (invsqrtpi*cc)/sqrtf(x);
-		else {
-		    u = pzerof(x); v = qzerof(x);
-		    z = invsqrtpi*(u*cc-v*ss)/sqrtf(x);
-		}
-		return z;
+                if (ix <= 0x5c000000)
+                  {
+                    u = pzerof(x); v = qzerof(x);
+                    cc = u*cc-v*ss;
+                  }
+                z = (invsqrtpi * cc) / sqrtf(x);
+                float magic = 0x1.8p+20; /* 2^20 + 2^19 */
+                if (magic + cc != magic) /* most likely */
+                  return z;
+                else /* |cc| <= 2^-4 */
+                  return j0f_near_root (x, z);
 	}
 	if(ix<0x39000000) {	/* |x| < 2**-13 */
 	    math_force_eval(huge+x);		/* raise inexact if x != 0 */
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps
index 633d2ab8e4..75deb1462d 100644
--- a/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/sysdeps/x86_64/fpu/libm-test-ulps
@@ -1312,22 +1312,22 @@ float128: 1
 ldouble: 1
 
 Function: "j0":
-double: 2
-float: 8
-float128: 2
+double: 5
+float: 9
+float128: 4
 ldouble: 2
 
 Function: "j0_downward":
 double: 2
-float: 4
+float: 9
 float128: 4
 ldouble: 6
 
 Function: "j0_towardzero":
-double: 5
-float: 6
+double: 9
+float: 7
 float128: 4
-ldouble: 6
+ldouble: 9
 
 Function: "j0_upward":
 double: 4
-- 
2.29.2


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] Fix the inaccuracy of j0f (BZ 14469).
  2021-01-18  7:13 [PATCH] Fix the inaccuracy of j0f (BZ 14469) Paul Zimmermann
@ 2021-01-18 20:26 ` Joseph Myers
  2021-01-19 10:28   ` [PATCH] Fix the inaccuracy of j0f (BZ 14469) and y0f (BZ 14471) Paul Zimmermann
  0 siblings, 1 reply; 3+ messages in thread
From: Joseph Myers @ 2021-01-18 20:26 UTC (permalink / raw)
  To: Paul Zimmermann; +Cc: libc-alpha

On Mon, 18 Jan 2021, Paul Zimmermann wrote:

> +static float P[SMALL_SIZE][7] = {
> +{0x2.63cb8cp+0,0x2.67a2a4p+0,0x2.6f5d28p+0,0xf.26247p-28,-0x8.4e6d9p-4,0x1.ba1c7p-4,0xe.6c06ap-8},

These lines should have appropriate indentation and spacing.  Spaces after 
commas and '{', before '}', two-space indent for the line.

> +/* the following polynomial was generated by Sollya */

Comments should start with a capital letter and end with '.' and two 
spaces.

Likewise elsewhere in this patch and the y0f one.

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH] Fix the inaccuracy of j0f (BZ 14469) and y0f (BZ 14471)
  2021-01-18 20:26 ` Joseph Myers
@ 2021-01-19 10:28   ` Paul Zimmermann
  0 siblings, 0 replies; 3+ messages in thread
From: Paul Zimmermann @ 2021-01-19 10:28 UTC (permalink / raw)
  To: Joseph Myers; +Cc: libc-alpha

thank you Joseph for your review. Here is an updated combined patch.

Best regards,
Paul

For both j0f and y0f, the largest error for all binary32 inputs is now less
then 9.5 ulps with respect to the infinite precision result, i.e., less than
9 ulps after rounding, which is the largest error allowed.  (This is for
rounding to nearest; for other rounding modes, the new code should not behave
worse than the old one.)

The new code is enabled only when there is a cancellation at the very end of
the j0f/y0f computation, or for very large inputs, thus should not give any
visible slowdown on average. Two different algorithms are used:

* around the first 64 zeros of j0/y0, approximation polynomials of degree 3
  are used, computed using the Sollya tool (https://www.sollya.org/)

* for large inputs, an asymptotic formula from [1] is used

[1] Fast and Accurate Bessel Function Computation,
John Harrison, Proceedings of Arith 19, 2009.

The largest error is now obtained for the following inputs respectively:

libm wrong by up to 9.50e+00 ulp(s) for x=0x1.8bde7ep+5
j0f     gives 0x1.e97c1ep-14
mpfr_j0 gives 0x1.e97c0cp-14

libm wrong by up to 9.49e+00 ulp(s) for x=0x1.3d4e56p+6
y0f     gives 0x1.b42876p-16
mpfr_y0 gives 0x1.b42864p-16
---
 math/auto-libm-test-in            |   8 +-
 math/auto-libm-test-out-j0        |  50 +--
 math/auto-libm-test-out-y0        |  50 +--
 sysdeps/ieee754/flt-32/e_j0f.c    | 564 +++++++++++++++++++++++++++---
 sysdeps/x86_64/fpu/libm-test-ulps |  28 +-
 5 files changed, 592 insertions(+), 108 deletions(-)

diff --git a/math/auto-libm-test-in b/math/auto-libm-test-in
index 73840b8bef..2c7afe64ee 100644
--- a/math/auto-libm-test-in
+++ b/math/auto-libm-test-in
@@ -5756,8 +5756,8 @@ j0 -0x1.001000001p+593
 j0 0x1p1023
 j0 0x1p16382
 j0 0x1p16383
-# the next value generates larger error bounds on x86_64 (binary32)
-j0 0x2.602774p+0 xfail-rounding:ibm128-libgcc
+# the next value generates large error bounds on x86_64 (binary32)
+j0 0x1.8bde7ep+5
 # the next value exercises the flt-32 code path for x >= 2^127
 j0 0x8.2f4ecp+124
 
@@ -8225,8 +8225,8 @@ y0 0x1p-100
 y0 0x1p-110
 y0 0x1p-600
 y0 0x1p-10000
-# the next value generates larger error bounds on x86_64 (binary32)
-y0 0xd.3432bp-4
+# the next value generates large error bounds on x86_64 (binary32)
+y0 0x1.3d4e56p+6
 y0 min
 y0 min_subnorm
 
diff --git a/math/auto-libm-test-out-j0 b/math/auto-libm-test-out-j0
index cc1fea074e..32b9685de3 100644
--- a/math/auto-libm-test-out-j0
+++ b/math/auto-libm-test-out-j0
@@ -1334,31 +1334,31 @@ j0 0x1p16383
 = j0 tonearest ibm128 0xf.ffffffffffffbffffffffffffcp+1020 : -0xb.a80d0ee91ce259a722e1f01904p-516 : inexact-ok
 = j0 towardzero ibm128 0xf.ffffffffffffbffffffffffffcp+1020 : -0xb.a80d0ee91ce259a722e1f019p-516 : inexact-ok
 = j0 upward ibm128 0xf.ffffffffffffbffffffffffffcp+1020 : -0xb.a80d0ee91ce259a722e1f019p-516 : inexact-ok
-j0 0x2.602774p+0 xfail-rounding:ibm128-libgcc
-= j0 downward binary32 0x2.602774p+0 : 0x3.e83778p-8 : xfail:ibm128-libgcc inexact-ok
-= j0 tonearest binary32 0x2.602774p+0 : 0x3.e83778p-8 : inexact-ok
-= j0 towardzero binary32 0x2.602774p+0 : 0x3.e83778p-8 : xfail:ibm128-libgcc inexact-ok
-= j0 upward binary32 0x2.602774p+0 : 0x3.e8377cp-8 : xfail:ibm128-libgcc inexact-ok
-= j0 downward binary64 0x2.602774p+0 : 0x3.e83779fe1991p-8 : xfail:ibm128-libgcc inexact-ok
-= j0 tonearest binary64 0x2.602774p+0 : 0x3.e83779fe19912p-8 : inexact-ok
-= j0 towardzero binary64 0x2.602774p+0 : 0x3.e83779fe1991p-8 : xfail:ibm128-libgcc inexact-ok
-= j0 upward binary64 0x2.602774p+0 : 0x3.e83779fe19912p-8 : xfail:ibm128-libgcc inexact-ok
-= j0 downward intel96 0x2.602774p+0 : 0x3.e83779fe19911fa8p-8 : xfail:ibm128-libgcc inexact-ok
-= j0 tonearest intel96 0x2.602774p+0 : 0x3.e83779fe19911fa8p-8 : inexact-ok
-= j0 towardzero intel96 0x2.602774p+0 : 0x3.e83779fe19911fa8p-8 : xfail:ibm128-libgcc inexact-ok
-= j0 upward intel96 0x2.602774p+0 : 0x3.e83779fe19911facp-8 : xfail:ibm128-libgcc inexact-ok
-= j0 downward m68k96 0x2.602774p+0 : 0x3.e83779fe19911fa8p-8 : xfail:ibm128-libgcc inexact-ok
-= j0 tonearest m68k96 0x2.602774p+0 : 0x3.e83779fe19911fa8p-8 : inexact-ok
-= j0 towardzero m68k96 0x2.602774p+0 : 0x3.e83779fe19911fa8p-8 : xfail:ibm128-libgcc inexact-ok
-= j0 upward m68k96 0x2.602774p+0 : 0x3.e83779fe19911facp-8 : xfail:ibm128-libgcc inexact-ok
-= j0 downward binary128 0x2.602774p+0 : 0x3.e83779fe19911fa806cee83ab7p-8 : xfail:ibm128-libgcc inexact-ok
-= j0 tonearest binary128 0x2.602774p+0 : 0x3.e83779fe19911fa806cee83ab7p-8 : inexact-ok
-= j0 towardzero binary128 0x2.602774p+0 : 0x3.e83779fe19911fa806cee83ab7p-8 : xfail:ibm128-libgcc inexact-ok
-= j0 upward binary128 0x2.602774p+0 : 0x3.e83779fe19911fa806cee83ab702p-8 : xfail:ibm128-libgcc inexact-ok
-= j0 downward ibm128 0x2.602774p+0 : 0x3.e83779fe19911fa806cee83ab7p-8 : xfail:ibm128-libgcc inexact-ok
-= j0 tonearest ibm128 0x2.602774p+0 : 0x3.e83779fe19911fa806cee83ab7p-8 : inexact-ok
-= j0 towardzero ibm128 0x2.602774p+0 : 0x3.e83779fe19911fa806cee83ab7p-8 : xfail:ibm128-libgcc inexact-ok
-= j0 upward ibm128 0x2.602774p+0 : 0x3.e83779fe19911fa806cee83ab8p-8 : xfail:ibm128-libgcc inexact-ok
+j0 0x1.8bde7ep+5
+= j0 downward binary32 0x3.17bcfcp+4 : 0x7.a5f028p-16 : inexact-ok
+= j0 tonearest binary32 0x3.17bcfcp+4 : 0x7.a5f03p-16 : inexact-ok
+= j0 towardzero binary32 0x3.17bcfcp+4 : 0x7.a5f028p-16 : inexact-ok
+= j0 upward binary32 0x3.17bcfcp+4 : 0x7.a5f03p-16 : inexact-ok
+= j0 downward binary64 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77cp-16 : inexact-ok
+= j0 tonearest binary64 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77cp-16 : inexact-ok
+= j0 towardzero binary64 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77cp-16 : inexact-ok
+= j0 upward binary64 0x3.17bcfcp+4 : 0x7.a5f02c0a5b78p-16 : inexact-ok
+= j0 downward intel96 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0a8p-16 : inexact-ok
+= j0 tonearest intel96 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0bp-16 : inexact-ok
+= j0 towardzero intel96 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0a8p-16 : inexact-ok
+= j0 upward intel96 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0bp-16 : inexact-ok
+= j0 downward m68k96 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0a8p-16 : inexact-ok
+= j0 tonearest m68k96 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0bp-16 : inexact-ok
+= j0 towardzero m68k96 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0a8p-16 : inexact-ok
+= j0 upward m68k96 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0bp-16 : inexact-ok
+= j0 downward binary128 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0af558d6935f168p-16 : inexact-ok
+= j0 tonearest binary128 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0af558d6935f168p-16 : inexact-ok
+= j0 towardzero binary128 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0af558d6935f168p-16 : inexact-ok
+= j0 upward binary128 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0af558d6935f16cp-16 : inexact-ok
+= j0 downward ibm128 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0af558d6935fp-16 : inexact-ok
+= j0 tonearest ibm128 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0af558d6935f2p-16 : inexact-ok
+= j0 towardzero ibm128 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0af558d6935fp-16 : inexact-ok
+= j0 upward ibm128 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0af558d6935f2p-16 : inexact-ok
 j0 0x8.2f4ecp+124
 = j0 downward binary32 0x8.2f4ecp+124 : 0xd.331efp-68 : inexact-ok
 = j0 tonearest binary32 0x8.2f4ecp+124 : 0xd.331efp-68 : inexact-ok
diff --git a/math/auto-libm-test-out-y0 b/math/auto-libm-test-out-y0
index 8ebb585170..8148a56933 100644
--- a/math/auto-libm-test-out-y0
+++ b/math/auto-libm-test-out-y0
@@ -795,31 +795,31 @@ y0 0x1p-10000
 = y0 tonearest binary128 0x1p-10000 : -0x1.13cc92aab9d385d1d0f2693cb631p+12 : inexact-ok
 = y0 towardzero binary128 0x1p-10000 : -0x1.13cc92aab9d385d1d0f2693cb631p+12 : inexact-ok
 = y0 upward binary128 0x1p-10000 : -0x1.13cc92aab9d385d1d0f2693cb631p+12 : inexact-ok
-y0 0xd.3432bp-4
-= y0 downward binary32 0xd.3432bp-4 : -0xf.fdd88p-8 : inexact-ok
-= y0 tonearest binary32 0xd.3432bp-4 : -0xf.fdd87p-8 : inexact-ok
-= y0 towardzero binary32 0xd.3432bp-4 : -0xf.fdd87p-8 : inexact-ok
-= y0 upward binary32 0xd.3432bp-4 : -0xf.fdd87p-8 : inexact-ok
-= y0 downward binary64 0xd.3432bp-4 : -0xf.fdd871793bc78p-8 : inexact-ok
-= y0 tonearest binary64 0xd.3432bp-4 : -0xf.fdd871793bc7p-8 : inexact-ok
-= y0 towardzero binary64 0xd.3432bp-4 : -0xf.fdd871793bc7p-8 : inexact-ok
-= y0 upward binary64 0xd.3432bp-4 : -0xf.fdd871793bc7p-8 : inexact-ok
-= y0 downward intel96 0xd.3432bp-4 : -0xf.fdd871793bc71fap-8 : inexact-ok
-= y0 tonearest intel96 0xd.3432bp-4 : -0xf.fdd871793bc71f9p-8 : inexact-ok
-= y0 towardzero intel96 0xd.3432bp-4 : -0xf.fdd871793bc71f9p-8 : inexact-ok
-= y0 upward intel96 0xd.3432bp-4 : -0xf.fdd871793bc71f9p-8 : inexact-ok
-= y0 downward m68k96 0xd.3432bp-4 : -0xf.fdd871793bc71fap-8 : inexact-ok
-= y0 tonearest m68k96 0xd.3432bp-4 : -0xf.fdd871793bc71f9p-8 : inexact-ok
-= y0 towardzero m68k96 0xd.3432bp-4 : -0xf.fdd871793bc71f9p-8 : inexact-ok
-= y0 upward m68k96 0xd.3432bp-4 : -0xf.fdd871793bc71f9p-8 : inexact-ok
-= y0 downward binary128 0xd.3432bp-4 : -0xf.fdd871793bc71f92d6b137b44118p-8 : inexact-ok
-= y0 tonearest binary128 0xd.3432bp-4 : -0xf.fdd871793bc71f92d6b137b44118p-8 : inexact-ok
-= y0 towardzero binary128 0xd.3432bp-4 : -0xf.fdd871793bc71f92d6b137b4411p-8 : inexact-ok
-= y0 upward binary128 0xd.3432bp-4 : -0xf.fdd871793bc71f92d6b137b4411p-8 : inexact-ok
-= y0 downward ibm128 0xd.3432bp-4 : -0xf.fdd871793bc71f92d6b137b444p-8 : inexact-ok
-= y0 tonearest ibm128 0xd.3432bp-4 : -0xf.fdd871793bc71f92d6b137b44p-8 : inexact-ok
-= y0 towardzero ibm128 0xd.3432bp-4 : -0xf.fdd871793bc71f92d6b137b44p-8 : inexact-ok
-= y0 upward ibm128 0xd.3432bp-4 : -0xf.fdd871793bc71f92d6b137b44p-8 : inexact-ok
+y0 0x1.3d4e56p+6
+= y0 downward binary32 0x4.f53958p+4 : 0x1.b42862p-16 : inexact-ok
+= y0 tonearest binary32 0x4.f53958p+4 : 0x1.b42864p-16 : inexact-ok
+= y0 towardzero binary32 0x4.f53958p+4 : 0x1.b42862p-16 : inexact-ok
+= y0 upward binary32 0x4.f53958p+4 : 0x1.b42864p-16 : inexact-ok
+= y0 downward binary64 0x4.f53958p+4 : 0x1.b428630651a17p-16 : inexact-ok
+= y0 tonearest binary64 0x4.f53958p+4 : 0x1.b428630651a17p-16 : inexact-ok
+= y0 towardzero binary64 0x4.f53958p+4 : 0x1.b428630651a17p-16 : inexact-ok
+= y0 upward binary64 0x4.f53958p+4 : 0x1.b428630651a18p-16 : inexact-ok
+= y0 downward intel96 0x4.f53958p+4 : 0x1.b428630651a175acp-16 : inexact-ok
+= y0 tonearest intel96 0x4.f53958p+4 : 0x1.b428630651a175acp-16 : inexact-ok
+= y0 towardzero intel96 0x4.f53958p+4 : 0x1.b428630651a175acp-16 : inexact-ok
+= y0 upward intel96 0x4.f53958p+4 : 0x1.b428630651a175aep-16 : inexact-ok
+= y0 downward m68k96 0x4.f53958p+4 : 0x1.b428630651a175acp-16 : inexact-ok
+= y0 tonearest m68k96 0x4.f53958p+4 : 0x1.b428630651a175acp-16 : inexact-ok
+= y0 towardzero m68k96 0x4.f53958p+4 : 0x1.b428630651a175acp-16 : inexact-ok
+= y0 upward m68k96 0x4.f53958p+4 : 0x1.b428630651a175aep-16 : inexact-ok
+= y0 downward binary128 0x4.f53958p+4 : 0x1.b428630651a175ac20abf8104eeap-16 : inexact-ok
+= y0 tonearest binary128 0x4.f53958p+4 : 0x1.b428630651a175ac20abf8104eebp-16 : inexact-ok
+= y0 towardzero binary128 0x4.f53958p+4 : 0x1.b428630651a175ac20abf8104eeap-16 : inexact-ok
+= y0 upward binary128 0x4.f53958p+4 : 0x1.b428630651a175ac20abf8104eebp-16 : inexact-ok
+= y0 downward ibm128 0x4.f53958p+4 : 0x1.b428630651a175ac20abf8104e8p-16 : inexact-ok
+= y0 tonearest ibm128 0x4.f53958p+4 : 0x1.b428630651a175ac20abf8104fp-16 : inexact-ok
+= y0 towardzero ibm128 0x4.f53958p+4 : 0x1.b428630651a175ac20abf8104e8p-16 : inexact-ok
+= y0 upward ibm128 0x4.f53958p+4 : 0x1.b428630651a175ac20abf8104fp-16 : inexact-ok
 y0 min
 = y0 downward binary32 0x4p-128 : -0x3.7ac89cp+4 : inexact-ok
 = y0 tonearest binary32 0x4p-128 : -0x3.7ac89cp+4 : inexact-ok
diff --git a/sysdeps/ieee754/flt-32/e_j0f.c b/sysdeps/ieee754/flt-32/e_j0f.c
index 5d29611eb7..92da9e5642 100644
--- a/sysdeps/ieee754/flt-32/e_j0f.c
+++ b/sysdeps/ieee754/flt-32/e_j0f.c
@@ -37,6 +37,330 @@ S04  =  1.1661400734e-09; /* 0x30a045e8 */
 
 static const float zero = 0.0;
 
+#define FIRST_ZERO_J0 2.404825f
+
+#define SMALL_SIZE 64
+
+/* The following table contains successive zeros of j0 and degree-3 polynomial
+   approximations of j0 around these zeros: Pj[0] for the first zero (2.404825),
+   Pj[1] for the second one (5.520078), and so on.  Each line contains:
+              {x0, xmid, x1, p0, p1, p2, p3}
+   where [x0,x1] is the interval around the zero, xmid is the binary32 number closest
+   to the zero, and p0+p1*x+p2*x^2+p3*x^3 is the approximation polynomial.
+   Each polynomial was generated using Remez' algorithm on the interval [x0,x1]
+   around the corresponding zero where the error is larger than 9 ulps for the
+   default code below.  Degree 3 is enough to get an error less than 4 ulps.
+*/
+static float Pj[SMALL_SIZE][7] = {
+  { 0x2.63cb8cp+0, 0x2.67a2a4p+0, 0x2.6f5d28p+0, 0xf.26247p-28, -0x8.4e6d9p-4, 0x1.ba1c7p-4, 0xe.6c06ap-8 },
+/* The following polynomial was generated by Sollya. */
+  { 0x5.83abe8p+0, 0x5.8523d8p+0, 0x5.8a557p+0, 0x6.9205fp-28, 0x5.71b98p-4, -0x7.e3e798p-8, -0xd.87d1p-8 },
+  { 0x8.a66f1p+0, 0x8.a75abp+0, 0x8.a92a7p+0, 0x1.bcc1cap-24, -0x4.57de6p-4, 0x4.03debp-8, 0xb.44a4ap-8 },
+  { 0xb.c9905p+0, 0xb.caa2p+0, 0xb.ccc6bp+0, -0xf.2976fp-32, 0x3.b827ccp-4, -0x2.85fdb8p-8, -0x9.c5a97p-8 },
+  { 0xe.edb6ap+0, 0xe.ee50ap+0, 0xe.ef864p+0, -0x1.bd67d8p-28, -0x3.4e03ap-4, 0x1.c54b8ep-8, 0x8.bb70dp-8 },
+  { 0x1.2119fp+4, 0x1.212314p+4, 0x1.21375p+4, 0x1.62209cp-28, 0x3.00efecp-4, -0x1.5467fp-8, -0x7.f5a2p-8 },
+  { 0x1.535d28p+4, 0x1.5362dep+4, 0x1.536cbcp+4, -0x2.853f74p-24, -0x2.c5b274p-4, 0x1.0bab0ap-8, 0x7.5c05b8p-8 },
+  { 0x1.859df6p+4, 0x1.85a3bap+4, 0x1.85afcap+4, 0x2.19ed1cp-24, 0x2.96545cp-4, -0xd.995c9p-12, -0x6.e024cp-8 },
+  { 0x1.b7dfe6p+4, 0x1.b7e54ap+4, 0x1.b7ebccp+4, 0xe.959aep-28, -0x2.6f5594p-4, 0xb.55ff1p-12, 0x6.79cf58p-8 },
+  { 0x1.ea22bcp+4, 0x1.ea275ap+4, 0x1.ea2e2ep+4, 0x2.0c3964p-24, 0x2.4e80fcp-4, -0x9.a35abp-12, -0x6.234b08p-8 },
+  { 0x2.1c6638p+4, 0x2.1c69c4p+4, 0x2.1c6d7cp+4, -0x3.642554p-24, -0x2.325e48p-4, 0x8.534f1p-12, 0x5.d9048p-8 },
+  { 0x2.4ea8dcp+4, 0x2.4eac7p+4, 0x2.4eb39cp+4, 0x1.6c015ap-24, 0x2.19e7d8p-4, -0x7.4915f8p-12, -0x5.984698p-8 },
+  { 0x2.80ec2p+4, 0x2.80ef5p+4, 0x2.80f72cp+4, -0x4.b18c9p-28, -0x2.046174p-4, 0x6.720468p-12, 0x5.5f426p-8 },
+  { 0x2.b32e6p+4, 0x2.b33258p+4, 0x2.b33654p+4, -0x1.8b8792p-24, 0x1.f13fbp-4, -0x5.c149dp-12, -0x5.2c935p-8 },
+  { 0x2.e5736p+4, 0x2.e5758p+4, 0x2.e57894p+4, 0x3.a26e0cp-24, -0x1.e018dap-4, 0x5.2df918p-12, 0x4.ff0f68p-8 },
+  { 0x3.17b694p+4, 0x3.17b8c4p+4, 0x3.17bcecp+4, -0x2.18fabcp-24, 0x1.d09b22p-4, -0x4.b1c31p-12, -0x4.d5ecd8p-8 },
+  { 0x3.49f9d8p+4, 0x3.49fc1cp+4, 0x3.4a0084p+4, 0x3.2370e8p-24, -0x1.c28614p-4, 0x4.47bb78p-12, 0x4.b08458p-8 },
+  { 0x3.7c3d78p+4, 0x3.7c3f88p+4, 0x3.7c43ep+4, -0x5.9eae3p-28, 0x1.b5a622p-4, -0x3.ec892cp-12, -0x4.8e4d3p-8 },
+  { 0x3.ae80ap+4, 0x3.ae83p+4, 0x3.ae8528p+4, 0x2.9fa1e8p-24, -0x1.a9d184p-4, 0x3.9d2fa8p-12, 0x4.6edccp-8 },
+  { 0x3.e0c484p+4, 0x3.e0c688p+4, 0x3.e0c8a4p+4, 0x9.9ac67p-28, 0x1.9ee5eep-4, -0x3.57e9ep-12, -0x4.51d1e8p-8 },
+  { 0x4.1308f8p+4, 0x4.130a18p+4, 0x4.130c58p+4, 0xd.6ab94p-28, -0x1.94c6f6p-4, 0x3.1ac03p-12, 0x4.36e4bp-8 },
+  { 0x4.454cp+4, 0x4.454dbp+4, 0x4.45504p+4, -0x4.4cb2d8p-24, 0x1.8b5cccp-4, -0x2.e477ap-12, -0x4.1dd858p-8 },
+  { 0x4.778f6p+4, 0x4.779158p+4, 0x4.779368p+4, -0x4.4aa8c8p-24, -0x1.829356p-4, 0x2.b4726cp-12, 0x4.0676dp-8 },
+  { 0x4.a9d3ep+4, 0x4.a9d5p+4, 0x4.a9d6cp+4, 0x2.077c38p-24, 0x1.7a597ep-4, -0x2.891dbcp-12, -0x3.f09154p-8 },
+  { 0x4.dc175p+4, 0x4.dc18bp+4, 0x4.dc1a08p+4, -0x2.6a6cd8p-24, -0x1.72a09ap-4, 0x2.62315cp-12, 0x3.dc034p-8 },
+  { 0x5.0e5bb8p+4, 0x5.0e5c6p+4, 0x5.0e5dbp+4, -0x5.08287p-24, 0x1.6b5c06p-4, -0x2.3ec48p-12, -0x3.c8a91cp-8 },
+  { 0x5.409ebp+4, 0x5.40a02p+4, 0x5.40a188p+4, -0x3.4598dcp-24, -0x1.6480c4p-4, 0x2.1f1798p-12, 0x3.b667ccp-8 },
+  { 0x5.72e268p+4, 0x5.72e3ep+4, 0x5.72e54p+4, 0x5.4e74bp-24, 0x1.5e0544p-4, -0x2.021254p-12, -0x3.a5248cp-8 },
+  { 0x5.a5263p+4, 0x5.a527ap+4, 0x5.a528d8p+4, -0x2.05751cp-24, -0x1.57e12p-4, 0x1.e7643ep-12, 0x3.94c994p-8 },
+  { 0x5.d76acp+4, 0x5.d76b68p+4, 0x5.d76ccp+4, 0x4.c5e278p-24, 0x1.520ceep-4, -0x1.cf1d4ep-12, -0x3.85428cp-8 },
+  { 0x6.09ae88p+4, 0x6.09af3p+4, 0x6.09b0bp+4, -0x3.50e62cp-24, -0x1.4c822p-4, 0x1.b8ab9ap-12, 0x3.767f34p-8 },
+  { 0x6.3bf21p+4, 0x6.3bf2f8p+4, 0x6.3bf418p+4, -0x1.c39f38p-24, 0x1.473ae6p-4, -0x1.a3dccep-12, -0x3.68700cp-8 },
+  { 0x6.6e362p+4, 0x6.6e36c8p+4, 0x6.6e3818p+4, -0x1.9245b6p-28, -0x1.42320ap-4, 0x1.90d5f2p-12, 0x3.5b0634p-8 },
+  { 0x6.a079dp+4, 0x6.a07a98p+4, 0x6.a07b78p+4, -0x1.0acf8p-24, 0x1.3d62e6p-4, -0x1.7f1e42p-12, -0x3.4e3678p-8 },
+  { 0x6.d2bda8p+4, 0x6.d2be68p+4, 0x6.d2bfb8p+4, 0x4.cd92d8p-24, -0x1.38c94ap-4, 0x1.6e94e2p-12, 0x3.41f4acp-8 },
+  { 0x7.05018p+4, 0x7.05024p+4, 0x7.0503p+4, -0x1.37bf8ap-24, 0x1.34617p-4, -0x1.5f6a22p-12, -0x3.3637c8p-8 },
+  { 0x7.37459p+4, 0x7.374618p+4, 0x7.3747ap+4, -0x1.8f62dep-28, -0x1.3027fp-4, 0x1.51357ap-12, 0x3.2af594p-8 },
+  { 0x7.69892p+4, 0x7.6989fp+4, 0x7.698b98p+4, -0x9.81e79p-28, 0x1.2c19b4p-4, -0x1.43e0aep-12, -0x3.20271p-8 },
+  { 0x7.9bccf8p+4, 0x7.9bcdc8p+4, 0x7.9bceap+4, 0x3.103b3p-24, -0x1.2833eep-4, 0x1.37580ep-12, 0x3.15c484p-8 },
+  { 0x7.ce10dp+4, 0x7.ce11a8p+4, 0x7.ce136p+4, 0x2.07b058p-24, 0x1.24740ap-4, -0x1.2bd334p-12, -0x3.0bc628p-8 },
+  { 0x8.0054cp+4, 0x8.00558p+4, 0x8.00562p+4, 0x3.87576cp-24, -0x1.20d7b6p-4, 0x1.20af6cp-12, 0x3.022738p-8 },
+  { 0x8.32994p+4, 0x8.32996p+4, 0x8.329a4p+4, -0x1.691ecp-24, 0x1.1d5ccap-4, -0x1.167022p-12, -0x2.f8e084p-8 },
+  { 0x8.64dcdp+4, 0x8.64dd4p+4, 0x8.64dd9p+4, 0x9.b406dp-28, -0x1.1a015p-4, 0x1.0cbfd2p-12, 0x2.efee34p-8 },
+  { 0x8.97209p+4, 0x8.97212p+4, 0x8.9721bp+4, -0xf.bfd8fp-28, 0x1.16c37ap-4, -0x1.039356p-12, -0x2.e74a78p-8 },
+  { 0x8.c9649p+4, 0x8.c965p+4, 0x8.c965bp+4, 0x2.6d50c8p-24, -0x1.13a19ep-4, 0xf.ae0ap-16, 0x2.def13p-8 },
+  { 0x8.fba89p+4, 0x8.fba8ep+4, 0x8.fba9dp+4, -0x4.d475c8p-24, 0x1.109a32p-4, -0xf.29e9cp-16, -0x2.d6de4cp-8 },
+  { 0x9.2dec7p+4, 0x9.2deccp+4, 0x9.2dedp+4, 0x8.1982p-24, -0x1.0dabc8p-4, 0xe.ac514p-16, 0x2.cf0e6p-8 },
+  { 0x9.60306p+4, 0x9.6030bp+4, 0x9.60318p+4, 0x4.864518p-24, 0x1.0ad51p-4, -0xe.3d1fbp-16, -0x2.c77d28p-8 },
+  { 0x9.92743p+4, 0x9.92749p+4, 0x9.9274ep+4, 0x6.8917a8p-28, -0x1.0814d4p-4, 0xd.cb25ap-16, 0x2.c0283p-8 },
+  { 0x9.c4b81p+4, 0x9.c4b87p+4, 0x9.c4b8ep+4, -0x5.fa18fp-24, 0x1.0569fp-4, -0xd.5e6d8p-16, -0x2.b90bep-8 },
+  { 0x9.f6fc2p+4, 0x9.f6fc6p+4, 0x9.f6fcep+4, -0x4.0e5c98p-24, -0x1.02d354p-4, 0xc.feb37p-16, 0x2.b2259p-8 },
+  { 0xa.293f8p+4, 0xa.29404p+4, 0xa.29408p+4, -0x2.c3ddbp-24, 0x1.005004p-4, -0xc.9b641p-16, -0x2.ab72fp-8 },
+  { 0xa.5b83bp+4, 0xa.5b843p+4, 0xa.5b852p+4, -0x5.d052p-24, -0xf.ddf16p-8, 0xc.444dcp-16, 0x2.a4f0d4p-8 },
+  { 0xa.8dc7ap+4, 0xa.8dc81p+4, 0xa.8dc87p+4, -0x2.0b97dcp-24, 0xf.b7fafp-8, -0xb.e93a7p-16, -0x2.9e9dccp-8 },
+  { 0xa.c00b4p+4, 0xa.c00cp+4, 0xa.c00c4p+4, -0x5.4aab5p-24, -0xf.930fep-8, 0xb.99b61p-16, 0x2.98774p-8 },
+  { 0xa.f24f5p+4, 0xa.f24fep+4, 0xa.f2509p+4, -0x3.6dadd8p-24, 0xf.6f245p-8, -0xb.45e12p-16, -0x2.927b08p-8 },
+  { 0xb.24939p+4, 0xb.2493dp+4, 0xb.24948p+4, -0x2.d7e038p-24, -0xf.4c2cep-8, 0xa.fd076p-16, 0x2.8ca788p-8 },
+  { 0xb.56d73p+4, 0xb.56d7bp+4, 0xb.56d82p+4, -0x6.977a1p-24, 0xf.2a1fp-8, -0xa.af99ap-16, -0x2.86fb2p-8 },
+  { 0xb.891b3p+4, 0xb.891bap+4, 0xb.891c7p+4, 0x1.3cc95ep-24, -0xf.08f0ap-8, 0xa.6ca59p-16, 0x2.8173b8p-8 },
+  { 0xb.bb5f5p+4, 0xb.bb5f9p+4, 0xb.bb5fdp+4, 0x3.a4921p-24, 0xe.e8986p-8, -0xa.2c5b8p-16, -0x2.7c1024p-8 },
+  { 0xb.eda37p+4, 0xb.eda37p+4, 0xb.eda3bp+4, 0x6.b45a7p-24, -0xe.c90d8p-8, 0x9.e7307p-16, 0x2.76ceacp-8 },
+  { 0xc.1fe71p+4, 0xc.1fe76p+4, 0xc.1fe87p+4, -0x2.8f34a4p-24, 0xe.aa478p-8, -0x9.abd8fp-16, -0x2.71adecp-8 },
+  { 0xc.522aep+4, 0xc.522b5p+4, 0xc.522c4p+4, -0x1.325968p-24, -0xe.8c3eap-8, 0x9.72bf7p-16, 0x2.6cacd4p-8 },
+  { 0xc.846eep+4, 0xc.846f4p+4, 0xc.846fap+4, 0x4.96b808p-24, 0xe.6eeb5p-8, -0x9.3bc53p-16, -0x2.67ca04p-8 }
+};
+
+/* Argument reduction mod 2pi: T[i] ~ 2^i mod (2*pi). */
+static double T[128] = {
+  0x1p+0,
+  0x2p+0,
+  -0x2.487ed5110b462p+0,
+  0x1.b7812aeef4b9fp+0,
+  -0x2.d97c7f3321d24p+0,
+  0x9.585d6aac7a1a8p-4,
+  0x1.2b0bad558f435p+0,
+  0x2.56175aab1e86ap+0,
+  -0x1.9c501fbace38dp+0,
+  0x3.0fde959b6ed46p+0,
+  -0x2.8c1a9da2d9d3cp-4,
+  -0x5.18353b45b3a78p-4,
+  -0xa.306a768b674fp-4,
+  -0x1.460d4ed16ce9ep+0,
+  -0x2.8c1a9da2d9d3cp+0,
+  0x1.304999cb579e8p+0,
+  0x2.60933396af3dp+0,
+  -0x1.87586de3accc3p+0,
+  -0x3.0eb0dbc759986p+0,
+  0x2.b1d1d8258155p-4,
+  0x5.63a3b04b02aap-4,
+  0xa.c74760960554p-4,
+  0x1.58e8ec12c0aa8p+0,
+  0x2.b1d1d8258155p+0,
+  -0xe.4db24c6089bf8p-4,
+  -0x1.c9b6498c1137fp+0,
+  0x2.b51241f8e8d64p+0,
+  -0xd.e5a511f3999bp-4,
+  -0x1.bcb4a23e73336p+0,
+  0x2.cf15909424df4p+0,
+  -0xa.a53b3e8c1877p-4,
+  -0x1.54a767d1830eep+0,
+  -0x2.a94ecfa3061dcp+0,
+  0xf.5e135caff0a78p-4,
+  0x1.ebc26b95fe14fp+0,
+  -0x2.70f9fde50f1c4p+0,
+  0x1.668ad946ed0dap+0,
+  0x2.cd15b28dda1b4p+0,
+  -0xa.e536ff5570fbp-4,
+  -0x1.5ca6dfeaae1f6p+0,
+  -0x2.b94dbfd55c3ecp+0,
+  0xd.5e3556652c89p-4,
+  0x1.abc6aacca5912p+0,
+  -0x2.f0f17f77c023ep+0,
+  0x6.69bd6218afe64p-4,
+  0xc.d37ac4315fcc8p-4,
+  0x1.9a6f58862bf99p+0,
+  -0x3.13a02404b352ep+0,
+  0x2.13e8d07a4a046p-4,
+  0x4.27d1a0f49408cp-4,
+  0x8.4fa341e928118p-4,
+  0x1.09f4683d25023p+0,
+  0x2.13e8d07a4a046p+0,
+  -0x2.20ad341c773d4p+0,
+  0x2.07246cd81ccb8p+0,
+  -0x2.3a35fb60d1af2p+0,
+  0x1.d412de4f67e7ep+0,
+  -0x2.a05918723b764p+0,
+  0x1.07cca42c94599p+0,
+  0x2.0f99485928b32p+0,
+  -0x2.294c445eb9dfep+0,
+  0x1.f5e64c5397865p+0,
+  -0x2.5cb23c69dc396p+0,
+  0x1.8f1a5c3d52d34p+0,
+  0x3.1e34b87aa5a68p+0,
+  -0xc.15641bbff90bp-8,
+  -0x1.82ac8377ff216p-4,
+  -0x3.055906effe42cp-4,
+  -0x6.0ab20ddffc858p-4,
+  -0xc.15641bbff90bp-4,
+  -0x1.82ac8377ff216p+0,
+  -0x3.055906effe42cp+0,
+  0x3.dccc7310ec09ap-4,
+  0x7.b998e621d8134p-4,
+  0xf.7331cc43b0268p-4,
+  0x1.ee6639887604dp+0,
+  -0x2.6bb262001f3c6p+0,
+  0x1.711a1110cccd4p+0,
+  0x2.e2342221999a8p+0,
+  -0x8.41690cdd81128p-4,
+  -0x1.082d219bb0225p+0,
+  -0x2.105a43376044ap+0,
+  0x2.27ca4ea24abcep+0,
+  -0x1.f8ea37cc75cc6p+0,
+  0x2.56aa65781fad6p+0,
+  -0x1.9b2a0a20cbeb6p+0,
+  0x3.122ac0cf736f6p+0,
+  -0x2.4295372246764p-4,
+  -0x4.852a6e448cec8p-4,
+  -0x9.0a54dc8919d9p-4,
+  -0x1.214a9b91233b2p+0,
+  -0x2.4295372246764p+0,
+  0x1.c35466cc7e59ap+0,
+  -0x2.c1d607780e92ep+0,
+  0xc.4d2c620ee206p-4,
+  0x1.89a58c41dc40cp+0,
+  0x3.134b1883b8818p+0,
+  -0x2.1e8a4099a4324p-4,
+  -0x4.3d14813348648p-4,
+  -0x8.7a29026690c9p-4,
+  -0x1.0f45204cd2192p+0,
+  -0x2.1e8a4099a4324p+0,
+  0x2.0b6a53ddc2e18p+0,
+  -0x2.31aa2d558583p+0,
+  0x1.e52a7a6600402p+0,
+  -0x2.7e29e0450ac5cp+0,
+  0x1.4c2b1486f5ba8p+0,
+  0x2.9856290deb75p+0,
+  -0x1.17d282f5345bfp+0,
+  -0x2.2fa505ea68b7ep+0,
+  0x1.e934c93c39d64p+0,
+  -0x2.761542989799ap+0,
+  0x1.5c544fdfdc12fp+0,
+  0x2.b8a89fbfb825ep+0,
+  -0xd.72d95919afa58p-4,
+  -0x1.ae5b2b2335f4bp+0,
+  0x2.ebc87eca9f5cap+0,
+  -0x7.0edd77bcc8ccp-4,
+  -0xe.1dbaef799198p-4,
+  -0x1.c3b75def3233p+0,
+  0x2.c1101932a6e02p+0,
+  -0xc.65ea2abbd85fp-4,
+  -0x1.8cbd45577b0bep+0,
+  -0x3.197a8aaef617cp+0,
+  0x1.589bfb31f1687p-4,
+  0x2.b137f663e2d0ep-4,
+  0x5.626fecc7c5a1cp-4,
+  0xa.c4dfd98f8b438p-4
+};
+
+/* Return h such that x - pi/4 mod (2*pi) ~ h. */
+static double
+reduce_mod_twopi (double x)
+{
+  double sign = 1, h;
+  if (x < 0)
+    {
+      x = -x;
+      sign = -1;
+    }
+  h = 0; /* Invariant is x+h mod (2*pi). */
+  while (x >= 4)
+    {
+      int i = ilogb (x);
+      x -= ldexp (1.0f, i);
+      h += T[i];
+    }
+  /* Add the remainder x. */
+  h += x;
+  /* Subtract pi/4. */
+  double piover4 = 0xc.90fdaa22168cp-4;
+  h -= piover4;
+  /* Reduce mod 2*pi. */
+  double twopi = 0x6.487ed5110b46p+0;
+  while (fabs (h) > twopi * 0.5)
+    {
+      if (h > 0)
+        h -= twopi;
+      else
+        h += twopi;
+    }
+  return sign * h;
+}
+
+/* Formula page 5 of https://www.cl.cam.ac.uk/~jrh13/papers/bessel.pdf:
+   j0f(x) ~ sqrt(2/(pi*x))*beta0(x)*cos(x-pi/4-alpha0(x))
+   where beta0(x) = 1 - 16/x^2 + 53/(512*x^4)
+   and alpha0(x) = 1/(8*x) - 25/(384*x^3). */
+static float
+j0f_asympt (float x)
+{
+  /* The following code fails to give an error <= 9 ulps in only one case,
+     for which we tabulate the result. */
+  if (x == 0x1.4665d2p+24f)
+    return 0xa.50206p-52f;
+  double y = 1.0 / (double) x;
+  double y2 = y * y;
+  double beta0 = 1.0f + y2 * (-0x1p-4f + 0x1.a8p-4 * y2);
+  double alpha0 = y * (0x2p-4 - 0x1.0aaaaap-4 * y2);
+  double h;
+  h = reduce_mod_twopi ((double) x);
+  /* Subtract alpha0. */
+  h -= alpha0;
+  /* Now reduce mod pi/2. */
+  double piover2 = 0x1.921fb54442d18p+0;
+  int n = 0;
+  while (fabs (h) > piover2 / 2)
+    {
+      if (h > 0)
+        {
+          h -= piover2;
+          n ++;
+        }
+      else
+        {
+          h += piover2;
+          n --;
+        }
+    }
+  /* Now x - pi/4 - alpha0 = h + n*pi/2 mod (2*pi). */
+  float xr = (float) h;
+  n = n & 3;
+  float cst = 0xc.c422ap-4; /* sqrt(2/pi) rounded to nearest */
+  float t = cst / sqrtf (x) * (float) beta0;
+  if (n == 0)
+    return t * cosf (xr);
+  else if (n == 2) /* cos(x+pi) = -cos(x) */
+    return -t * cosf (xr);
+  else if (n == 1) /* cos(x+pi/2) = -sin(x) */
+    return -t * sinf (xr);
+  else             /* cos(x+3pi/2) = sin(x) */
+    return t * sinf (xr);
+}
+
+/* Special code for x near a root of j0.
+   z is the value computed by the generic code.
+   For small x, we use a polynomial approximating j0 around its root.
+   For large x, we use an asymptotic formula (j0f_asympt). */
+static float
+j0f_near_root (float x, float z)
+{
+  float index_f;
+  int index;
+  index_f = roundf ((x - FIRST_ZERO_J0) / (float) M_PI);
+  /* j0f_asympt fails to give an error <= 9 ulps for x=0x1.324e92p+7 (index 48)
+     thus we can't reduce SMALL_SIZE below 49. */
+  if (index_f >= SMALL_SIZE)
+    return j0f_asympt (x);
+  index = (int) index_f;
+  float *p = Pj[index];
+  float x0 = p[0];
+  float x1 = p[2];
+  /* If we are not in the interval [x0,x1] around xmid, we return the value z. */
+  if (! (x0 <= x && x <= x1))
+    return z;
+  float xmid = p[1];
+  float y = x - xmid;
+  return p[3] + y * (p[4] + y * (p[5] + y * p[6]));
+}
+
 float
 __ieee754_j0f(float x)
 {
@@ -51,36 +375,27 @@ __ieee754_j0f(float x)
 		__sincosf (x, &s, &c);
 		ss = s-c;
 		cc = s+c;
-		if(ix<0x7f000000) {  /* make sure x+x not overflow */
-		    z = -__cosf(x+x);
-		    if ((s*c)<zero) cc = z/ss;
-		    else	    ss = z/cc;
-		} else {
-		    /* We subtract (exactly) a value x0 such that
-		       cos(x0)+sin(x0) is very near to 0, and use the identity
-		       sin(x-x0) = sin(x)*cos(x0)-cos(x)*sin(x0) to get
-		       sin(x) + cos(x) with extra accuracy.  */
-		    float x0 = 0xe.d4108p+124f;
-		    float y = x - x0; /* exact  */
-		    /* sin(y) = sin(x)*cos(x0)-cos(x)*sin(x0)  */
-		    z = __sinf (y);
-		    float eps = 0x1.5f263ep-24f;
-		    /* cos(x0) ~ -sin(x0) + eps  */
-		    z += eps * __cosf (x);
-		    /* now z ~ (sin(x)-cos(x))*cos(x0)  */
-		    float cosx0 = -0xb.504f3p-4f;
-		    cc = z / cosx0;
-                }
+                if (ix >= 0x7f000000) /* x >= 2^127: use asymptotic expansion. */
+                  return j0f_asympt (x);
+                /* Now we are sure that x+x cannot overflow. */
+                z = -__cosf(x+x);
+                if ((s*c)<zero) cc = z/ss;
+                else	    ss = z/cc;
 	/*
 	 * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x)
 	 * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x)
 	 */
-		if(ix>0x5c000000) z = (invsqrtpi*cc)/sqrtf(x);
-		else {
-		    u = pzerof(x); v = qzerof(x);
-		    z = invsqrtpi*(u*cc-v*ss)/sqrtf(x);
-		}
-		return z;
+                if (ix <= 0x5c000000)
+                  {
+                    u = pzerof(x); v = qzerof(x);
+                    cc = u*cc-v*ss;
+                  }
+                z = (invsqrtpi * cc) / sqrtf(x);
+                float magic = 0x1.8p+20; /* 2^20 + 2^19 */
+                if (magic + cc != magic) /* Most likely. */
+                  return z;
+                else /* |cc| <= 2^-4 */
+                  return j0f_near_root (x, z);
 	}
 	if(ix<0x39000000) {	/* |x| < 2**-13 */
 	    math_force_eval(huge+x);		/* raise inexact if x != 0 */
@@ -112,6 +427,168 @@ v02  =  7.6006865129e-05, /* 0x389f65e0 */
 v03  =  2.5915085189e-07, /* 0x348b216c */
 v04  =  4.4111031494e-10; /* 0x2ff280c2 */
 
+#define FIRST_ZERO_Y0 0.893576f
+
+#define SMALL_SIZE 64
+
+/* The following table contains successive zeros of y0 and degree-3 polynomial
+   approximations of y0 around these zeros: Py[0] for the first zero (0.893576),
+   Py[1] for the second one (3.957678), and so on.  Each line contains:
+              {x0, xmid, x1, p0, p1, p2, p3}
+   where [x0,x1] is the interval around the zero, xmid is the binary32 number closest
+   to the zero, and p0+p1*x+p2*x^2+p3*x^3 is the approximation polynomial.
+   Each polynomial was generated using Remez' algorithm on the interval [x0,x1]
+   around the corresponding zero where the error is larger than 9 ulps for the
+   default code below.  Degree 3 is enough to get an error less than 4 ulps.
+*/
+static float Py[SMALL_SIZE][7] = {
+  /* For the first root we use a degree-4 polynomial since degree 3 is not enough,
+     where the coefficient of degree 4 is hard-coded in y0f_near_root(). */
+  { 0xd.bd613p-4, 0xe.4c176p-4, 0xe.e0897p-4, 0x3.274468p-28, 0xe.121b7p-4, -0x7.df8eap-4, 0x3.88cc2p-4 },
+  { 0x3.f2af8cp+0, 0x3.f52a68p+0, 0x3.fa1fa4p+0, 0xa.f1f83p-28, -0x6.70d098p-4, 0xd.04dc4p-8, 0xe.f2a5p-8 },
+  { 0x7.1464ap+0, 0x7.16077p+0, 0x7.191dap+0, -0x5.e2a51p-28, 0x4.cd3328p-4, -0x5.6bbb5p-8, -0xc.48cfbp-8 },
+  { 0xa.37ec2p+0, 0xa.38ebap+0, 0xa.3ad94p+0, -0x1.4b0aeep-24, -0x3.fec6b8p-4, 0x3.206ccp-8, 0xa.72401p-8 },
+  { 0xd.5bd7dp+0, 0xd.5c70ep+0, 0xd.5d94ap+0, -0x8.179d7p-28, 0x3.7e6544p-4, -0x2.178554p-8, -0x9.35f5bp-8 },
+  { 0x1.07f9aap+4, 0x1.0803c8p+4, 0x1.08170cp+4, -0x2.5b8078p-24, -0x3.24b868p-4, 0x1.86265ep-8, 0x8.51de2p-8 },
+  { 0x1.3a3d44p+4, 0x1.3a42cep+4, 0x1.3a4d8ap+4, 0x1.cd304ap-28, 0x2.e189ecp-4, -0x1.2c673ap-8, -0x7.a4726p-8 },
+  { 0x1.6c7d5ep+4, 0x1.6c833p+4, 0x1.6c99fp+4, -0x6.c63f1p-28, -0x2.acc9a8p-4, 0xf.077a1p-12, 0x7.1aba98p-8 },
+  { 0x1.9ebec4p+4, 0x1.9ec47p+4, 0x1.9ed016p+4, 0x1.e53838p-24, 0x2.81f2p-4, -0xc.61ccp-12, -0x6.aaa99p-8 },
+  { 0x1.d1008ep+4, 0x1.d10644p+4, 0x1.d10cf2p+4, 0x1.636feep-24, -0x2.5e40dcp-4, 0xa.6dfp-12, 0x6.4cd5a8p-8 },
+  { 0x2.0344f8p+4, 0x2.034884p+4, 0x2.034d4p+4, -0x4.04e1bp-28, 0x2.3febd8p-4, -0x8.f0ff9p-12, -0x5.fcd088p-8 },
+  { 0x2.358778p+4, 0x2.358b1p+4, 0x2.359224p+4, 0x3.6063d8p-24, -0x2.25baacp-4, 0x7.c6a088p-12, 0x5.b78ff8p-8 },
+  { 0x2.67ca1p+4, 0x2.67cdd8p+4, 0x2.67d434p+4, -0x3.f39ebcp-24, 0x2.0ed04cp-4, -0x6.d7eaf8p-12, -0x5.7aeaa8p-8 },
+  { 0x2.9a0d0cp+4, 0x2.9a10dp+4, 0x2.9a1828p+4, -0x4.ea23p-28, -0x1.fa8b4p-4, 0x6.158438p-12, 0x5.45324p-8 },
+  { 0x2.cc51ecp+4, 0x2.cc53e8p+4, 0x2.cc580cp+4, -0x3.5df0c8p-24, 0x1.e8727ep-4, -0x5.7460d8p-12, -0x5.1536p-8 },
+  { 0x2.fe94f8p+4, 0x2.fe972p+4, 0x2.fe9b18p+4, 0x1.1ef09ep-24, -0x1.d8293ap-4, 0x4.ed6058p-12, 0x4.e9fcc8p-8 },
+  { 0x3.30d8b8p+4, 0x3.30da7p+4, 0x3.30debcp+4, 0x1.b70cecp-24, 0x1.c967p-4, -0x4.7ad838p-12, -0x4.c2c9d8p-8 },
+  { 0x3.631b94p+4, 0x3.631ddp+4, 0x3.632244p+4, 0x1.abaadcp-24, -0x1.bbf246p-4, 0x4.187ba8p-12, 0x4.9f09f8p-8 },
+  { 0x3.955f7cp+4, 0x3.956144p+4, 0x3.9565fcp+4, 0x1.63989ep-24, 0x1.af9cb4p-4, -0x3.c397f8p-12, -0x4.7e4038p-8 },
+  { 0x3.c7a2bp+4, 0x3.c7a4c4p+4, 0x3.c7a878p+4, -0x1.68a8ecp-24, -0x1.a4407ep-4, 0x3.797fdcp-12, 0x4.600d3p-8 },
+  { 0x3.f9e62cp+4, 0x3.f9e85p+4, 0x3.f9ea7p+4, 0x1.e1bb5p-24, 0x1.99be74p-4, -0x3.38739cp-12, -0x4.441c5p-8 },
+  { 0x4.2c2a1p+4, 0x4.2c2be8p+4, 0x4.2c2e7p+4, -0x5.5bbcfp-24, -0x1.8ffc9ap-4, 0x2.ff0f5cp-12, 0x4.2a266p-8 },
+  { 0x4.5e6d98p+4, 0x4.5e6f8p+4, 0x4.5e71c8p+4, -0x4.9e34a8p-24, 0x1.86e51cp-4, -0x2.cba284p-12, -0x4.11f568p-8 },
+  { 0x4.90b17p+4, 0x4.90b328p+4, 0x4.90b5ap+4, 0x1.966706p-24, -0x1.7e657p-4, 0x2.9e0d44p-12, 0x3.fb56a4p-8 },
+  { 0x4.c2f59p+4, 0x4.c2f6d8p+4, 0x4.c2fac8p+4, 0x3.4b3b68p-24, 0x1.766dc2p-4, -0x2.752fbp-12, -0x3.e61fcp-8 },
+  { 0x4.f53968p+4, 0x4.f53a88p+4, 0x4.f53cb8p+4, 0x6.a99008p-28, -0x1.6ef07ep-4, 0x2.501294p-12, 0x3.d230ep-8 },
+  { 0x5.277dp+4, 0x5.277e4p+4, 0x5.278108p+4, -0x7.a9fa6p-32, 0x1.67e1dap-4, -0x2.2e9388p-12, -0x3.bf663cp-8 },
+  { 0x5.59c0e8p+4, 0x5.59c2p+4, 0x5.59c398p+4, -0x5.026808p-24, -0x1.613798p-4, 0x2.104558p-12, 0x3.ada76cp-8 },
+  { 0x5.8c0498p+4, 0x5.8c05cp+4, 0x5.8c0898p+4, 0x4.46aa2p-24, 0x1.5ae8c2p-4, -0x1.f474eep-12, -0x3.9cda1cp-8 },
+  { 0x5.be48dp+4, 0x5.be498p+4, 0x5.be4aap+4, 0x1.5cfccp-24, -0x1.54ed76p-4, 0x1.dad812p-12, 0x3.8cec8p-8 },
+  { 0x5.f08c08p+4, 0x5.f08d48p+4, 0x5.f08ecp+4, -0xb.4dc4cp-28, 0x1.4f3ebcp-4, -0x1.c38338p-12, -0x3.7dc9dp-8 },
+  { 0x6.22d05p+4, 0x6.22d11p+4, 0x6.22d428p+4, 0x3.f5343p-24, -0x1.49d668p-4, 0x1.ade97cp-12, 0x3.6f610cp-8 },
+  { 0x6.55137p+4, 0x6.5514ep+4, 0x6.551638p+4, -0x6.e6f32p-28, 0x1.44aefap-4, -0x1.9a2d3ep-12, -0x3.61a778p-8 },
+  { 0x6.8757e8p+4, 0x6.8758bp+4, 0x6.8759c8p+4, 0x1.f359c2p-28, -0x1.3fc386p-4, 0x1.87d25cp-12, 0x3.548be4p-8 },
+  { 0x6.b99bp+4, 0x6.b99c8p+4, 0x6.b99e2p+4, -0x2.9de748p-24, 0x1.3b0fa4p-4, -0x1.76b5aap-12, -0x3.48048p-8 },
+  { 0x6.ebdfb8p+4, 0x6.ebe058p+4, 0x6.ebe1bp+4, -0x2.24ccc8p-24, -0x1.368f5cp-4, 0x1.67061p-12, 0x3.3c0608p-8 },
+  { 0x7.1e2368p+4, 0x7.1e243p+4, 0x7.1e25bp+4, 0x4.7dcea8p-24, 0x1.323f16p-4, -0x1.5858c4p-12, -0x3.3087acp-8 },
+  { 0x7.50673p+4, 0x7.506808p+4, 0x7.5069ap+4, -0x4.b538p-24, -0x1.2e1b98p-4, 0x1.4a9624p-12, 0x3.258078p-8 },
+  { 0x7.82ab38p+4, 0x7.82abep+4, 0x7.82ad78p+4, 0x3.09dc4cp-24, 0x1.2a21ecp-4, -0x1.3da94p-12, -0x3.1ae88cp-8 },
+  { 0x7.b4eeep+4, 0x7.b4efb8p+4, 0x7.b4f158p+4, 0x4.d5a58p-28, -0x1.264f66p-4, 0x1.317f8cp-12, 0x3.10b8fcp-8 },
+  { 0x7.e732cp+4, 0x7.e73398p+4, 0x7.e73548p+4, 0x3.f4c44cp-24, 0x1.22a192p-4, -0x1.265128p-12, -0x3.06eb08p-8 },
+  { 0x8.1976bp+4, 0x8.19777p+4, 0x8.19783p+4, 0x2.4beae8p-24, -0x1.1f1634p-4, 0x1.1b7d2ap-12, 0x2.fd7934p-8 },
+  { 0x8.4bbbp+4, 0x8.4bbb5p+4, 0x8.4bbcep+4, -0xd.a581ep-28, 0x1.1bab3cp-4, -0x1.1186d6p-12, -0x2.f45cep-8 },
+  { 0x8.7dfe8p+4, 0x8.7dff3p+4, 0x8.7dffbp+4, 0xa.7c0bdp-28, -0x1.185eccp-4, 0x1.0819c2p-12, 0x2.eb92ccp-8 },
+  { 0x8.b042bp+4, 0x8.b0431p+4, 0x8.b043dp+4, -0x1.9452ecp-24, 0x1.152f26p-4, -0xf.f2b59p-16, -0x2.e314bp-8 },
+  { 0x8.e2868p+4, 0x8.e286fp+4, 0x8.e287cp+4, 0x3.83b7b8p-24, -0x1.121ab2p-4, 0xf.6b21p-16, 0x2.dadf34p-8 },
+  { 0x9.14ca8p+4, 0x9.14cadp+4, 0x9.14cbbp+4, -0x6.5ca3a8p-24, 0x1.0f1ff2p-4, -0xe.ea544p-16, -0x2.d2ee28p-8 },
+  { 0x9.470e6p+4, 0x9.470ecp+4, 0x9.470fp+4, -0x6.bb61ap-24, -0x1.0c3d8ap-4, 0xe.7833fp-16, 0x2.cb3e2cp-8 },
+  { 0x9.79524p+4, 0x9.7952ap+4, 0x9.79539p+4, 0x2.2438p-24, 0x1.097236p-4, -0xe.03747p-16, -0x2.c3cb48p-8 },
+  { 0x9.ab95cp+4, 0x9.ab968p+4, 0x9.ab971p+4, 0x3.1e0054p-24, -0x1.06bcc8p-4, 0xd.94272p-16, 0x2.bc9328p-8 },
+  { 0x9.ddd9fp+4, 0x9.ddda7p+4, 0x9.dddb5p+4, 0x7.46c908p-24, 0x1.041c28p-4, -0xd.320e5p-16, -0x2.b5920cp-8 },
+  { 0xa.101d7p+4, 0xa.101e5p+4, 0xa.101f3p+4, -0xb.4f158p-28, -0x1.018f52p-4, 0xc.cc7dfp-16, 0x2.aec5f4p-8 },
+  { 0xa.4261cp+4, 0xa.42623p+4, 0xa.4262bp+4, -0x6.5a89c8p-24, 0xf.f155p-8, -0xc.6b5cbp-16, -0x2.a82be4p-8 },
+  { 0xa.74a5cp+4, 0xa.74a62p+4, 0xa.74a6ap+4, -0x1.ef16c8p-24, -0xf.cad3fp-8, 0xc.16478p-16, 0x2.a1c1ap-8 },
+  { 0xa.a6e9bp+4, 0xa.a6eap+4, 0xa.a6ea9p+4, -0x6.1e7b68p-24, 0xf.a564cp-8, -0xb.bd1eap-16, -0x2.9b84f8p-8 },
+  { 0xa.d92d7p+4, 0xa.d92dfp+4, 0xa.d92eap+4, -0xf.8c858p-28, -0xf.80faep-8, 0xb.6f5dbp-16, 0x2.9573dcp-8 },
+  { 0xb.0b71ap+4, 0xb.0b71ep+4, 0xb.0b727p+4, 0x7.75d858p-24, 0xf.5d8abp-8, -0xb.24e88p-16, -0x2.8f8c4cp-8 },
+  { 0xb.3db58p+4, 0xb.3db5cp+4, 0xb.3db68p+4, 0x1.d78632p-24, -0xf.3b096p-8, 0xa.d5ef1p-16, 0x2.89cc8p-8 },
+  { 0xb.6ff96p+4, 0xb.6ff9bp+4, 0xb.6ffaap+4, 0x3.b24794p-24, 0xf.196c7p-8, -0xa.918e2p-16, -0x2.8432cp-8 },
+  { 0xb.a23d1p+4, 0xb.a23d9p+4, 0xb.a23e5p+4, 0x6.39cbc8p-24, -0xe.f8aa5p-8, 0xa.486fap-16, 0x2.7ebd9p-8 },
+  { 0xb.d481p+4, 0xb.d4818p+4, 0xb.d481cp+4, -0x1.820e3ap-24, 0xe.d8b9dp-8, -0xa.0973fp-16, -0x2.796b4cp-8 },
+  { 0xc.06c4fp+4, 0xc.06c57p+4, 0xc.06c5fp+4, -0x2.c7e038p-24, -0xe.b9925p-8, 0x9.cce94p-16, 0x2.743a5cp-8 },
+  { 0xc.3908ep+4, 0xc.39096p+4, 0xc.390a2p+4, 0x6.ab31c8p-24, 0xe.9b2bep-8, -0x9.92ad2p-16, -0x2.6f2994p-8 },
+  { 0xc.6b4cdp+4, 0xc.6b4d4p+4, 0xc.6b4d8p+4, 0x4.4ef25p-24, -0xe.7d7ecp-8, 0x9.5360fp-16, 0x2.6a37dp-8 }
+};
+
+/* Formula page 5 of https://www.cl.cam.ac.uk/~jrh13/papers/bessel.pdf:
+   y0(x) ~ sqrt(2/(pi*x))*beta0(x)*sin(x-pi/4-alpha0(x))
+   where beta0(x) = 1 - 16/x^2 + 53/(512*x^4)
+   and alpha0(x) = 1/(8*x) - 25/(384*x^3). */
+static float
+y0f_asympt (float x)
+{
+  /* The following code fails to give an error <= 9 ulps in only two cases,
+     for which we tabulate the correctly-rounded result. */
+  if (x == 0x1.bfad96p+7f)
+    return -0x7.f32bdp-32f;
+  if (x == 0x1.2e2a42p+17f)
+    return 0x1.a48974p-40f;
+  double y = 1.0 / (double) x;
+  double y2 = y * y;
+  double beta0 = 1.0f + y2 * (-0x1p-4f + 0x1.a8p-4 * y2);
+  double alpha0 = y * (0x2p-4 - 0x1.0aaaaap-4 * y2);
+  double h;
+  h = reduce_mod_twopi ((double) x);
+  /* Subtract alpha0. */
+  h -= alpha0;
+  /* Now reduce mod pi/2. */
+  double piover2 = 0x1.921fb54442d18p+0;
+  int n = 0;
+  while (fabs (h) > piover2 / 2)
+    {
+      if (h > 0)
+        {
+          h -= piover2;
+          n ++;
+        }
+      else
+        {
+          h += piover2;
+          n --;
+        }
+    }
+  /* Now x - pi/4 - alpha0 = h + n*pi/2 mod (2*pi). */
+  float xr = (float) h;
+  n = n & 3;
+  float cst = 0xc.c422ap-4; /* sqrt(2/pi) rounded to nearest */
+  float t = cst / sqrtf (x) * (float) beta0;
+  if (n == 0)
+    return t * sinf (xr);
+  else if (n == 2) /* sin(x+pi) = -sin(x) */
+    return -t * sinf (xr);
+  else if (n == 1) /* sin(x+pi/2) = cos(x) */
+    return t * cosf (xr);
+  else             /* sin(x+3pi/2) = -cos(x) */
+    return -t * cosf (xr);
+}
+
+/* Special code for x near a root of y0.
+   z is the value computed by the generic code.
+   For small x, we use a polynomial approximating y0 around its root.
+   For large x, we use an asymptotic formula (y0f_asympt). */
+static float
+y0f_near_root (float x, float z)
+{
+  float index_f;
+  int index;
+  index_f = roundf ((x - FIRST_ZERO_Y0) / (float) M_PI);
+  if (index_f >= SMALL_SIZE)
+    return y0f_asympt (x);
+  index = (int) index_f;
+  float *p = Py[index];
+  float x0 = p[0];
+  float x1 = p[2];
+  /* If we are not in the interval [x0,x1] around xmid, we return the value z. */
+  if (! (x0 <= x && x <= x1))
+    return z;
+  float xmid = p[1];
+  float y = x - xmid;
+  /* For degree 0 we use a degree-4 polynomial, where the coefficient of degree 4
+     is hard-coded here. */
+  float p6 = (index > 0) ? p[6] : p[6] + y * -0x3.9ce95p-4;
+  return p[3] + y * (p[4] + y * (p[5] + y * p6));
+}
+
 float
 __ieee754_y0f(float x)
 {
@@ -124,7 +601,8 @@ __ieee754_y0f(float x)
 	if(ix>=0x7f800000) return  one/(x+x*x);
 	if(ix==0) return -1/zero; /* -inf and divide by zero exception.  */
 	if(hx<0) return zero/(zero*x);
-	if(ix >= 0x40000000) {  /* |x| >= 2.0 */
+	if(ix >= 0x40000000 || (0x3f5bd613 <= ix && ix <= 0x3f6e0897)) {
+          /* |x| >= 2.0 or 0x1.b7ac26p-1 <= |x| <= 0x1.dc112ep-1 (around 1st zero) */
 	/* y0(x) = sqrt(2/(pi*x))*(p0(x)*sin(x0)+q0(x)*cos(x0))
 	 * where x0 = x-pi/4
 	 *      Better formula:
@@ -143,17 +621,23 @@ __ieee754_y0f(float x)
 	 * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x)
 	 * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x)
 	 */
-		if(ix<0x7f000000) {  /* make sure x+x not overflow */
-		    z = -__cosf(x+x);
-		    if ((s*c)<zero) cc = z/ss;
-		    else            ss = z/cc;
-		}
-		if(ix>0x5c000000) z = (invsqrtpi*ss)/sqrtf(x);
-		else {
-		    u = pzerof(x); v = qzerof(x);
-		    z = invsqrtpi*(u*ss+v*cc)/sqrtf(x);
-		}
-		return z;
+                if (ix >= 0x7f000000) /* x >= 2^127: use asymptotic expansion. */
+                  return y0f_asympt (x);
+                /* Now we are sure that x+x cannot overflow. */
+                z = -__cosf(x+x);
+                if ((s*c)<zero) cc = z/ss;
+                else            ss = z/cc;
+                if (ix <= 0x5c000000)
+                  {
+                    u = pzerof(x); v = qzerof(x);
+                    ss = u*ss+v*cc;
+                  }
+                z = (invsqrtpi*ss)/sqrtf(x);
+                float magic = 0x1.8p+20; /* 2^20 + 2^19 */
+                if (magic + ss != magic) /* Most likely. */
+                  return z;
+                else /* |cc| <= 2^-4 */
+                  return y0f_near_root (x, z);
 	}
 	if(ix<=0x39800000) {	/* x < 2**-13 */
 	    return(u00 + tpi*__ieee754_logf(x));
@@ -165,7 +649,7 @@ __ieee754_y0f(float x)
 }
 libm_alias_finite (__ieee754_y0f, __y0f)
 
-/* The asymptotic expansions of pzero is
+/* The asymptotic expansion of pzero is
  *	1 - 9/128 s^2 + 11025/98304 s^4 - ...,	where s = 1/x.
  * For x >= 2, We approximate pzero by
  *	pzero(x) = 1 + (R/S)
@@ -257,7 +741,7 @@ pzerof(float x)
 }
 
 
-/* For x >= 8, the asymptotic expansions of qzero is
+/* For x >= 8, the asymptotic expansion of qzero is
  *	-1/8 s + 75/1024 s^3 - ..., where s = 1/x.
  * We approximate pzero by
  *	qzero(x) = s*(-1.25 + (R/S))
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps
index 633d2ab8e4..425e1cc881 100644
--- a/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/sysdeps/x86_64/fpu/libm-test-ulps
@@ -1312,22 +1312,22 @@ float128: 1
 ldouble: 1
 
 Function: "j0":
-double: 2
-float: 8
-float128: 2
+double: 5
+float: 9
+float128: 4
 ldouble: 2
 
 Function: "j0_downward":
 double: 2
-float: 4
+float: 9
 float128: 4
 ldouble: 6
 
 Function: "j0_towardzero":
-double: 5
-float: 6
+double: 9
+float: 7
 float128: 4
-ldouble: 6
+ldouble: 9
 
 Function: "j0_upward":
 double: 4
@@ -1748,10 +1748,10 @@ float128: 4
 ldouble: 5
 
 Function: "y0":
-double: 3
-float: 8
-float128: 3
-ldouble: 1
+double: 8
+float: 9
+float128: 4
+ldouble: 2
 
 Function: "y0_downward":
 double: 3
@@ -1760,15 +1760,15 @@ float128: 4
 ldouble: 5
 
 Function: "y0_towardzero":
-double: 3
+double: 5
 float: 3
 float128: 3
-ldouble: 6
+ldouble: 7
 
 Function: "y0_upward":
 double: 3
 float: 6
-float128: 3
+float128: 7
 ldouble: 5
 
 Function: "y1":
-- 
2.29.2


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-01-19 10:28 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-18  7:13 [PATCH] Fix the inaccuracy of j0f (BZ 14469) Paul Zimmermann
2021-01-18 20:26 ` Joseph Myers
2021-01-19 10:28   ` [PATCH] Fix the inaccuracy of j0f (BZ 14469) and y0f (BZ 14471) Paul Zimmermann

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).