unofficial mirror of libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] [powerpc] SET_RESTORE_ROUND optimizations and bug fix
@ 2019-09-10 18:15 Paul A. Clarke
  2019-09-10 22:06 ` Paul E Murphy
  0 siblings, 1 reply; 4+ messages in thread
From: Paul A. Clarke @ 2019-09-10 18:15 UTC (permalink / raw)
  To: libc-alpha; +Cc: tuliom, murphyp

From: "Paul A. Clarke" <pc@us.ibm.com>

SET_RESTORE_ROUND brackets a block of code, temporarily setting and
restoring the rounding mode and letting everything else, including
exceptions generated within the block, pass through.

On powerpc, the current code clears the exception enables, which will hide
exceptions generated within the block.  This issue was introduced by me
in commit e905212627350d54b58426214b5a54ddc852b0c9.

Fix this by not clearing exception enable bits in the prologue.

Also, since we are no longer changing the enable bits in either the
prologue or the epilogue, there is no need to test for entering/exiting
non-stop mode.

Also, optimize the prologue get/save/set rounding mode operations for
POWER9 and later by using 'mffscrn' when possible.

Fixes: e905212627350d54b58426214b5a54ddc852b0c9

2019-09-10  Paul A. Clarke  <pc@us.ibm.com>

	* sysdeps/powerpc/fpu/fenv_libc.h (fegetenv_and_set_rn): New.
	(__fe_mffscrn): New.
	* sysdeps/powerpc/fpu/fenv_private.h (libc_feholdsetround_ppc_ctx):
	Do not clear enable bits, remove obsolete code, use
	fegetenv_and_set_rn.
	(libc_feresetround_ppc): Remove obsolete code, use
	fegetenv_and_set_rn.
---
 sysdeps/powerpc/fpu/fenv_libc.h    | 32 ++++++++++++++++++++++++++++++++
 sysdeps/powerpc/fpu/fenv_private.h | 23 ++++-------------------
 2 files changed, 36 insertions(+), 19 deletions(-)

diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
index 0aad897..3173bc2 100644
--- a/sysdeps/powerpc/fpu/fenv_libc.h
+++ b/sysdeps/powerpc/fpu/fenv_libc.h
@@ -48,6 +48,38 @@ extern const fenv_t *__fe_mask_env (void) attribute_hidden;
     __fr;								\
   })
 
+#define __fe_mffscrn(rn)						\
+  ({register fenv_union_t __fr;						\
+    if (__builtin_constant_p (rn))					\
+      __asm__ __volatile__ (						\
+        ".machine push; .machine \"power9\"; mffscrni %0,%1; .machine pop" \
+        : "=f" (__fr.fenv) : "i" (rn));					\
+    else								\
+    {									\
+      __fr.l = (rn);							\
+      __asm__ __volatile__ (						\
+        ".machine push; .machine \"power9\"; mffscrn %0,%1; .machine pop" \
+        : "=f" (__fr.fenv) : "f" (__fr.fenv));				\
+    }									\
+    __fr.fenv;								\
+  })
+
+/* Like fegetenv_status, but also sets the rounding mode.  */
+#ifdef _ARCH_PWR9
+#define fegetenv_and_set_rn(rn) __fe_mffscrn (rn)
+#else
+/* 'mffscrn' will decode to 'mffs' on ARCH < 3_00, which is still necessary
+   but not sufficient, because it does not set the rounding mode.
+   Explicitly set the rounding mode when 'mffscrn' actually doesn't.  */
+#define fegetenv_and_set_rn(rn)						\
+  ({register fenv_union_t __fr;						\
+    __fr.fenv = __fe_mffscrn (rn);					\
+    if (__glibc_unlikely (!(GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00)))	\
+      __fesetround_inline (rn);						\
+    __fr.fenv;								\
+  })
+#endif
+
 /* Equivalent to fesetenv, but takes a fenv_t instead of a pointer.  */
 #define fesetenv_register(env) \
 	do { \
diff --git a/sysdeps/powerpc/fpu/fenv_private.h b/sysdeps/powerpc/fpu/fenv_private.h
index 3286b4e..504f7b8 100644
--- a/sysdeps/powerpc/fpu/fenv_private.h
+++ b/sysdeps/powerpc/fpu/fenv_private.h
@@ -101,11 +101,7 @@ static __always_inline void
 libc_feresetround_ppc (fenv_t *envp)
 {
   fenv_union_t new = { .fenv = *envp };
-
-  __TEST_AND_EXIT_NON_STOP (-1ULL, new.l);
-
-  /* Atomically enable and raise (if appropriate) exceptions set in `new'.  */
-  fesetenv_mode (new.fenv);
+  fegetenv_and_set_rn (new.l & FPSCR_RN_MASK);
 }
 
 static __always_inline int
@@ -147,21 +143,10 @@ libc_feupdateenv_ppc (fenv_t *e)
 static __always_inline void
 libc_feholdsetround_ppc_ctx (struct rm_ctx *ctx, int r)
 {
-  fenv_union_t old, new;
-
-  old.fenv = fegetenv_status ();
+  fenv_union_t old;
 
-  new.l = (old.l & ~(FPSCR_ENABLES_MASK|FPSCR_RN_MASK)) | r;
-
-  ctx->env = old.fenv;
-  if (__glibc_unlikely (new.l != old.l))
-    {
-      __TEST_AND_ENTER_NON_STOP (old.l, 0ULL);
-      fesetenv_mode (new.fenv);
-      ctx->updated_status = true;
-    }
-  else
-    ctx->updated_status = false;
+  ctx->env = old.fenv = fegetenv_and_set_rn (r);
+  ctx->updated_status = (r != (old.l & FPSCR_RN_MASK));
 }
 
 static __always_inline void
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] [powerpc] SET_RESTORE_ROUND optimizations and bug fix
  2019-09-10 18:15 [PATCH] [powerpc] SET_RESTORE_ROUND optimizations and bug fix Paul A. Clarke
@ 2019-09-10 22:06 ` Paul E Murphy
  2019-09-11 20:34   ` Paul Clarke
  0 siblings, 1 reply; 4+ messages in thread
From: Paul E Murphy @ 2019-09-10 22:06 UTC (permalink / raw)
  To: Paul A. Clarke, libc-alpha; +Cc: tuliom



On 9/10/19 1:15 PM, Paul A. Clarke wrote:
> From: "Paul A. Clarke" <pc@us.ibm.com>
> 
> SET_RESTORE_ROUND brackets a block of code, temporarily setting and
> restoring the rounding mode and letting everything else, including
> exceptions generated within the block, pass through.
> 
> On powerpc, the current code clears the exception enables, which will hide
> exceptions generated within the block.  This issue was introduced by me
> in commit e905212627350d54b58426214b5a54ddc852b0c9.
> 
> Fix this by not clearing exception enable bits in the prologue.
> 
> Also, since we are no longer changing the enable bits in either the
> prologue or the epilogue, there is no need to test for entering/exiting
> non-stop mode.
> 
> Also, optimize the prologue get/save/set rounding mode operations for
> POWER9 and later by using 'mffscrn' when possible.
> 
> Fixes: e905212627350d54b58426214b5a54ddc852b0c9
> 
> 2019-09-10  Paul A. Clarke  <pc@us.ibm.com>
> 
> 	* sysdeps/powerpc/fpu/fenv_libc.h (fegetenv_and_set_rn): New.
> 	(__fe_mffscrn): New.
> 	* sysdeps/powerpc/fpu/fenv_private.h (libc_feholdsetround_ppc_ctx):
> 	Do not clear enable bits, remove obsolete code, use
> 	fegetenv_and_set_rn.
> 	(libc_feresetround_ppc): Remove obsolete code, use
> 	fegetenv_and_set_rn.
> ---
>   sysdeps/powerpc/fpu/fenv_libc.h    | 32 ++++++++++++++++++++++++++++++++
>   sysdeps/powerpc/fpu/fenv_private.h | 23 ++++-------------------
>   2 files changed, 36 insertions(+), 19 deletions(-)
> 
> diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
> index 0aad897..3173bc2 100644
> --- a/sysdeps/powerpc/fpu/fenv_libc.h
> +++ b/sysdeps/powerpc/fpu/fenv_libc.h
> @@ -48,6 +48,38 @@ extern const fenv_t *__fe_mask_env (void) attribute_hidden;
>       __fr;								\
>     })
>   
> +#define __fe_mffscrn(rn)						\
> +  ({register fenv_union_t __fr;						\
> +    if (__builtin_constant_p (rn))					\
> +      __asm__ __volatile__ (						\
> +        ".machine push; .machine \"power9\"; mffscrni %0,%1; .machine pop" \
> +        : "=f" (__fr.fenv) : "i" (rn));					\
> +    else								\
> +    {									\
> +      __fr.l = (rn);							\
> +      __asm__ __volatile__ (						\
> +        ".machine push; .machine \"power9\"; mffscrn %0,%1; .machine pop" \
> +        : "=f" (__fr.fenv) : "f" (__fr.fenv));				\
> +    }									\
> +    __fr.fenv;								\
> +  })
OK

> +
> +/* Like fegetenv_status, but also sets the rounding mode.  */
> +#ifdef _ARCH_PWR9
> +#define fegetenv_and_set_rn(rn) __fe_mffscrn (rn)
> +#else
> +/* 'mffscrn' will decode to 'mffs' on ARCH < 3_00, which is still necessary
> +   but not sufficient, because it does not set the rounding mode.
> +   Explicitly set the rounding mode when 'mffscrn' actually doesn't.  */
> +#define fegetenv_and_set_rn(rn)						\
> +  ({register fenv_union_t __fr;						\
> +    __fr.fenv = __fe_mffscrn (rn);					\
> +    if (__glibc_unlikely (!(GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00)))	\
> +      __fesetround_inline (rn);						\
> +    __fr.fenv;								\
> +  })
> +#endif
Is the conditional code faster than unconditionally calling 
__fesetround_inline?

> +
>   /* Equivalent to fesetenv, but takes a fenv_t instead of a pointer.  */
>   #define fesetenv_register(env) \
>   	do { \
> diff --git a/sysdeps/powerpc/fpu/fenv_private.h b/sysdeps/powerpc/fpu/fenv_private.h
> index 3286b4e..504f7b8 100644
> --- a/sysdeps/powerpc/fpu/fenv_private.h
> +++ b/sysdeps/powerpc/fpu/fenv_private.h
> @@ -101,11 +101,7 @@ static __always_inline void
>   libc_feresetround_ppc (fenv_t *envp)
>   {
>     fenv_union_t new = { .fenv = *envp };
> -
> -  __TEST_AND_EXIT_NON_STOP (-1ULL, new.l);
> -
> -  /* Atomically enable and raise (if appropriate) exceptions set in `new'.  */
> -  fesetenv_mode (new.fenv);
> +  fegetenv_and_set_rn (new.l & FPSCR_RN_MASK);
>   }
OK


>   
>   static __always_inline int
> @@ -147,21 +143,10 @@ libc_feupdateenv_ppc (fenv_t *e)
>   static __always_inline void
>   libc_feholdsetround_ppc_ctx (struct rm_ctx *ctx, int r)
>   {
> -  fenv_union_t old, new;
> -
> -  old.fenv = fegetenv_status ();
> +  fenv_union_t old;
>   
> -  new.l = (old.l & ~(FPSCR_ENABLES_MASK|FPSCR_RN_MASK)) | r;
> -
> -  ctx->env = old.fenv;
> -  if (__glibc_unlikely (new.l != old.l))
> -    {
> -      __TEST_AND_ENTER_NON_STOP (old.l, 0ULL);
> -      fesetenv_mode (new.fenv);
> -      ctx->updated_status = true;
> -    }
> -  else
> -    ctx->updated_status = false;
> +  ctx->env = old.fenv = fegetenv_and_set_rn (r);
> +  ctx->updated_status = (r != (old.l & FPSCR_RN_MASK))OK

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] [powerpc] SET_RESTORE_ROUND optimizations and bug fix
  2019-09-10 22:06 ` Paul E Murphy
@ 2019-09-11 20:34   ` Paul Clarke
  2019-09-11 21:26     ` Paul E Murphy
  0 siblings, 1 reply; 4+ messages in thread
From: Paul Clarke @ 2019-09-11 20:34 UTC (permalink / raw)
  To: Paul E Murphy, libc-alpha; +Cc: tuliom

On 9/10/19 5:06 PM, Paul E Murphy wrote:
> On 9/10/19 1:15 PM, Paul A. Clarke wrote:
>> SET_RESTORE_ROUND brackets a block of code, temporarily setting and
>> restoring the rounding mode and letting everything else, including
>> exceptions generated within the block, pass through.
>>
>> On powerpc, the current code clears the exception enables, which will hide
>> exceptions generated within the block.  This issue was introduced by me
>> in commit e905212627350d54b58426214b5a54ddc852b0c9.
>>
>> Fix this by not clearing exception enable bits in the prologue.
>>
>> Also, since we are no longer changing the enable bits in either the
>> prologue or the epilogue, there is no need to test for entering/exiting
>> non-stop mode.
>>
>> Also, optimize the prologue get/save/set rounding mode operations for
>> POWER9 and later by using 'mffscrn' when possible.
>>
>> Fixes: e905212627350d54b58426214b5a54ddc852b0c9
>>
>> 2019-09-10  Paul A. Clarke  <pc@us.ibm.com>
>>
>>     * sysdeps/powerpc/fpu/fenv_libc.h (fegetenv_and_set_rn): New.
>>     (__fe_mffscrn): New.
>>     * sysdeps/powerpc/fpu/fenv_private.h (libc_feholdsetround_ppc_ctx):
>>     Do not clear enable bits, remove obsolete code, use
>>     fegetenv_and_set_rn.
>>     (libc_feresetround_ppc): Remove obsolete code, use
>>     fegetenv_and_set_rn.
>> ---
>>   sysdeps/powerpc/fpu/fenv_libc.h    | 32 ++++++++++++++++++++++++++++++++
>>   sysdeps/powerpc/fpu/fenv_private.h | 23 ++++-------------------
>>   2 files changed, 36 insertions(+), 19 deletions(-)
>>
>> diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
>> index 0aad897..3173bc2 100644
>> --- a/sysdeps/powerpc/fpu/fenv_libc.h
>> +++ b/sysdeps/powerpc/fpu/fenv_libc.h
>> @@ -48,6 +48,38 @@ extern const fenv_t *__fe_mask_env (void) attribute_hidden;
>>       __fr;                                \
>>     })
>>   +#define __fe_mffscrn(rn)                        \
>> +  ({register fenv_union_t __fr;                        \
>> +    if (__builtin_constant_p (rn))                    \
>> +      __asm__ __volatile__ (                        \
>> +        ".machine push; .machine \"power9\"; mffscrni %0,%1; .machine pop" \
>> +        : "=f" (__fr.fenv) : "i" (rn));                    \
>> +    else                                \
>> +    {                                    \
>> +      __fr.l = (rn);                            \
>> +      __asm__ __volatile__ (                        \
>> +        ".machine push; .machine \"power9\"; mffscrn %0,%1; .machine pop" \
>> +        : "=f" (__fr.fenv) : "f" (__fr.fenv));                \
>> +    }                                    \
>> +    __fr.fenv;                                \
>> +  })
> OK
> 
>> +
>> +/* Like fegetenv_status, but also sets the rounding mode.  */
>> +#ifdef _ARCH_PWR9
>> +#define fegetenv_and_set_rn(rn) __fe_mffscrn (rn)
>> +#else
>> +/* 'mffscrn' will decode to 'mffs' on ARCH < 3_00, which is still necessary
>> +   but not sufficient, because it does not set the rounding mode.
>> +   Explicitly set the rounding mode when 'mffscrn' actually doesn't.  */
>> +#define fegetenv_and_set_rn(rn)                        \
>> +  ({register fenv_union_t __fr;                        \
>> +    __fr.fenv = __fe_mffscrn (rn);                    \
>> +    if (__glibc_unlikely (!(GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00)))    \
>> +      __fesetround_inline (rn);                        \
>> +    __fr.fenv;                                \
>> +  })
>> +#endif

> Is the conditional code faster than unconditionally calling __fesetround_inline?

My measurements indicate that unconditionally calling _fesetround_inline is a bit slower than with the condition (on POWER9, where the condition will always fail).

>> +
>>   /* Equivalent to fesetenv, but takes a fenv_t instead of a pointer.  */
>>   #define fesetenv_register(env) \
>>       do { \
>> diff --git a/sysdeps/powerpc/fpu/fenv_private.h b/sysdeps/powerpc/fpu/fenv_private.h
>> index 3286b4e..504f7b8 100644
>> --- a/sysdeps/powerpc/fpu/fenv_private.h
>> +++ b/sysdeps/powerpc/fpu/fenv_private.h
>> @@ -101,11 +101,7 @@ static __always_inline void
>>   libc_feresetround_ppc (fenv_t *envp)
>>   {
>>     fenv_union_t new = { .fenv = *envp };
>> -
>> -  __TEST_AND_EXIT_NON_STOP (-1ULL, new.l);
>> -
>> -  /* Atomically enable and raise (if appropriate) exceptions set in `new'.  */
>> -  fesetenv_mode (new.fenv);
>> +  fegetenv_and_set_rn (new.l & FPSCR_RN_MASK);
>>   }
> OK
> 
> 
>>     static __always_inline int
>> @@ -147,21 +143,10 @@ libc_feupdateenv_ppc (fenv_t *e)
>>   static __always_inline void
>>   libc_feholdsetround_ppc_ctx (struct rm_ctx *ctx, int r)
>>   {
>> -  fenv_union_t old, new;
>> -
>> -  old.fenv = fegetenv_status ();
>> +  fenv_union_t old;
>>   -  new.l = (old.l & ~(FPSCR_ENABLES_MASK|FPSCR_RN_MASK)) | r;
>> -
>> -  ctx->env = old.fenv;
>> -  if (__glibc_unlikely (new.l != old.l))
>> -    {
>> -      __TEST_AND_ENTER_NON_STOP (old.l, 0ULL);
>> -      fesetenv_mode (new.fenv);
>> -      ctx->updated_status = true;
>> -    }
>> -  else
>> -    ctx->updated_status = false;
>> +  ctx->env = old.fenv = fegetenv_and_set_rn (r);
>> +  ctx->updated_status = (r != (old.l & FPSCR_RN_MASK))OK

PC

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] [powerpc] SET_RESTORE_ROUND optimizations and bug fix
  2019-09-11 20:34   ` Paul Clarke
@ 2019-09-11 21:26     ` Paul E Murphy
  0 siblings, 0 replies; 4+ messages in thread
From: Paul E Murphy @ 2019-09-11 21:26 UTC (permalink / raw)
  To: Paul Clarke, libc-alpha; +Cc: tuliom



On 9/11/19 3:34 PM, Paul Clarke wrote:
> On 9/10/19 5:06 PM, Paul E Murphy wrote:
>> On 9/10/19 1:15 PM, Paul A. Clarke wrote:
>>> +
>>> +/* Like fegetenv_status, but also sets the rounding mode.  */
>>> +#ifdef _ARCH_PWR9
>>> +#define fegetenv_and_set_rn(rn) __fe_mffscrn (rn)
>>> +#else
>>> +/* 'mffscrn' will decode to 'mffs' on ARCH < 3_00, which is still necessary
>>> +   but not sufficient, because it does not set the rounding mode.
>>> +   Explicitly set the rounding mode when 'mffscrn' actually doesn't.  */
>>> +#define fegetenv_and_set_rn(rn)                        \
>>> +  ({register fenv_union_t __fr;                        \
>>> +    __fr.fenv = __fe_mffscrn (rn);                    \
>>> +    if (__glibc_unlikely (!(GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00)))    \
>>> +      __fesetround_inline (rn);                        \
>>> +    __fr.fenv;                                \
>>> +  })
>>> +#endif
> 
>> Is the conditional code faster than unconditionally calling __fesetround_inline?
> 
> My measurements indicate that unconditionally calling _fesetround_inline is a bit slower than with the condition (on POWER9, where the condition will always fail).
> 

OK, thanks. LGTM.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2019-09-11 21:26 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-10 18:15 [PATCH] [powerpc] SET_RESTORE_ROUND optimizations and bug fix Paul A. Clarke
2019-09-10 22:06 ` Paul E Murphy
2019-09-11 20:34   ` Paul Clarke
2019-09-11 21:26     ` Paul E Murphy

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).