* [PATCH v3 6/7] i386: Move hypot implementation to C
2021-11-01 20:20 [PATCH v3 0/7] Improve hypot() Adhemerval Zanella via Libc-alpha
@ 2021-11-01 20:20 ` Adhemerval Zanella via Libc-alpha
0 siblings, 0 replies; 2+ messages in thread
From: Adhemerval Zanella via Libc-alpha @ 2021-11-01 20:20 UTC (permalink / raw)
To: libc-alpha; +Cc: Wilco Dijkstra
The generic hypotf is slight slower, mostly due the tricks the assembly
does to optimize the isinf/isnan/issignaling. The generic hypot is way
slower, since the optimized implementation uses the i386 default
excessive precision to issue the operation directly. A similar
implementation is provided instead of using the generic implementation:
Checked on i686-linux-gnu.
---
sysdeps/i386/fpu/e_hypot.S | 75 -------------------------------------
sysdeps/i386/fpu/e_hypot.c | 46 +++++++++++++++++++++++
sysdeps/i386/fpu/e_hypotf.S | 64 -------------------------------
3 files changed, 46 insertions(+), 139 deletions(-)
delete mode 100644 sysdeps/i386/fpu/e_hypot.S
create mode 100644 sysdeps/i386/fpu/e_hypot.c
delete mode 100644 sysdeps/i386/fpu/e_hypotf.S
diff --git a/sysdeps/i386/fpu/e_hypot.S b/sysdeps/i386/fpu/e_hypot.S
deleted file mode 100644
index f2c956b77a..0000000000
--- a/sysdeps/i386/fpu/e_hypot.S
+++ /dev/null
@@ -1,75 +0,0 @@
-/* Compute the hypothenuse of X and Y.
- Copyright (C) 1998-2021 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <i386-math-asm.h>
-#include <libm-alias-finite.h>
-
-DEFINE_DBL_MIN
-
-#ifdef PIC
-# define MO(op) op##@GOTOFF(%edx)
-#else
-# define MO(op) op
-#endif
-
- .text
-ENTRY(__ieee754_hypot)
-#ifdef PIC
- LOAD_PIC_REG (dx)
-#endif
- fldl 4(%esp) // x
- fxam
- fnstsw
- fldl 12(%esp) // y : x
- movb %ah, %ch
- fxam
- fnstsw
- movb %ah, %al
- orb %ch, %ah
- sahf
- jc 1f
- fmul %st(0) // y * y : x
- fxch // x : y * y
- fmul %st(0) // x * x : y * y
- faddp // x * x + y * y
- fsqrt
- DBL_NARROW_EVAL_UFLOW_NONNEG
-2: ret
-
- // We have to test whether any of the parameters is Inf.
- // In this case the result is infinity.
-1: andb $0x45, %al
- cmpb $5, %al
- je 3f // jump if y is Inf
- andb $0x45, %ch
- cmpb $5, %ch
- jne 4f // jump if x is not Inf
- fxch
-3: fstp %st(1)
- fabs
- jmp 2b
-
-4: testb $1, %al
- jnz 5f // y is NaN
- fxch
-5: fstp %st(1)
- jmp 2b
-
-END(__ieee754_hypot)
-libm_alias_finite (__ieee754_hypot, __hypot)
diff --git a/sysdeps/i386/fpu/e_hypot.c b/sysdeps/i386/fpu/e_hypot.c
new file mode 100644
index 0000000000..f2c9a8f1b9
--- /dev/null
+++ b/sysdeps/i386/fpu/e_hypot.c
@@ -0,0 +1,46 @@
+/* Euclidean distance function. Double/Binary64 i386 version.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <math.h>
+#include <math_private.h>
+#include <math-underflow.h>
+#include <math-narrow-eval.h>
+#include <libm-alias-finite.h>
+#include <math_config.h>
+
+/* The i386 allows to use the default excess of precision to optimize the
+ hypot implementation, since internal multiplication and sqrt is carried
+ with 80-bit FP type. */
+double
+__ieee754_hypot (double x, double y)
+{
+ if (!isfinite (x) || !isfinite (y))
+ {
+ if ((isinf (x) || isinf (y))
+ && !issignaling (x) && !issignaling (y))
+ return INFINITY;
+ return x + y;
+ }
+
+ long double lx = x;
+ long double ly = y;
+ double r = sqrtl (math_narrow_eval (lx * lx + ly * ly));
+ math_check_force_underflow_nonneg (r);
+ return r;
+}
+libm_alias_finite (__ieee754_hypot, __hypot)
diff --git a/sysdeps/i386/fpu/e_hypotf.S b/sysdeps/i386/fpu/e_hypotf.S
deleted file mode 100644
index cec5d15403..0000000000
--- a/sysdeps/i386/fpu/e_hypotf.S
+++ /dev/null
@@ -1,64 +0,0 @@
-/* Compute the hypothenuse of X and Y.
- Copyright (C) 1998-2021 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <i386-math-asm.h>
-#include <libm-alias-finite.h>
-
- .text
-ENTRY(__ieee754_hypotf)
- flds 4(%esp) // x
- fxam
- fnstsw
- flds 8(%esp) // y : x
- movb %ah, %ch
- fxam
- fnstsw
- movb %ah, %al
- orb %ch, %ah
- sahf
- jc 1f
- fmul %st(0) // y * y : x
- fxch // x : y * y
- fmul %st(0) // x * x : y * y
- faddp // x * x + y * y
- fsqrt
- FLT_NARROW_EVAL
-2: ret
-
- // We have to test whether any of the parameters is Inf.
- // In this case the result is infinity.
-1: andb $0x45, %al
- cmpb $5, %al
- je 3f // jump if y is Inf
- andb $0x45, %ch
- cmpb $5, %ch
- jne 4f // jump if x is not Inf
- fxch
-3: fstp %st(1)
- fabs
- jmp 2b
-
-4: testb $1, %al
- jnz 5f // y is NaN
- fxch
-5: fstp %st(1)
- jmp 2b
-
-END(__ieee754_hypotf)
-libm_alias_finite (__ieee754_hypotf, __hypotf)
--
2.32.0
^ permalink raw reply related [flat|nested] 2+ messages in thread
* [PATCH v3 6/7] i386: Move hypot implementation to C
@ 2021-11-22 14:47 Wilco Dijkstra via Libc-alpha
0 siblings, 0 replies; 2+ messages in thread
From: Wilco Dijkstra via Libc-alpha @ 2021-11-22 14:47 UTC (permalink / raw)
To: Adhemerval Zanella; +Cc: 'GNU C Library'
Hi Adhemerval,
One more comment on this version:
+ long double lx = x;
+ long double ly = y;
+ double r = sqrtl (math_narrow_eval (lx * lx + ly * ly));
+ math_check_force_underflow_nonneg (r);
+ return r;
I think Joseph meant something like:
double r = math_narrow_eval ((double) sqrtl (lx * lx + ly * ly));
The narrow eval macro forces the result to memory to remove excess
precision but it only works if it has the narrower type already.
Cheers,
Wilco
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2021-11-22 14:48 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-22 14:47 [PATCH v3 6/7] i386: Move hypot implementation to C Wilco Dijkstra via Libc-alpha
-- strict thread matches above, loose matches on Subject: below --
2021-11-01 20:20 [PATCH v3 0/7] Improve hypot() Adhemerval Zanella via Libc-alpha
2021-11-01 20:20 ` [PATCH v3 6/7] i386: Move hypot implementation to C Adhemerval Zanella via Libc-alpha
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).