This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH 5/7] Create and use libc_feupdateenv_test.


We can reduce the number of STMXCSR, and often we can avoid the
call to __feraiseexcept.

	* sysdeps/generic/math_private.h (default_libc_feupdateenv_test): New.
	(libc_feupdateenv_test, libc_feupdateenv_testf): New.
	(libc_feupdateenv_testl): New.
	* sysdeps/x86_64/fpu/math_private.h (libc_feupdateenv_test): New.
	(libc_feupdateenv_testf): New.
	(libc_feupdateenv): Use libc_feupdateenv_test.
	* sysdeps/ieee754/dbl-64/s_fma.c (__fma): Use libc_feupdateenv_test.
	* sysdeps/ieee754/dbl-64/s_fmaf.c (__fmaf): Likewise.
---
 sysdeps/generic/math_private.h    |   18 ++++++++++++++++++
 sysdeps/ieee754/dbl-64/s_fma.c    |   25 +++++++++++++------------
 sysdeps/ieee754/dbl-64/s_fmaf.c   |   12 +++++++++---
 sysdeps/x86_64/fpu/math_private.h |   28 ++++++++++++++++++++++++----
 4 files changed, 64 insertions(+), 19 deletions(-)

diff --git a/sysdeps/generic/math_private.h b/sysdeps/generic/math_private.h
index 0b945f9..813ad93 100644
--- a/sysdeps/generic/math_private.h
+++ b/sysdeps/generic/math_private.h
@@ -457,6 +457,24 @@ default_libc_feupdateenv (fenv_t *e)
 # define libc_feupdateenv_53bit libc_feupdateenv
 #endif
 
+static __always_inline int
+default_libc_feupdateenv_test (fenv_t *e, int ex)
+{
+  int ret = fetestexcept (ex);
+  feupdateenv (e);
+  return ret;
+}
+
+#ifndef libc_feupdateenv_test
+# define libc_feupdateenv_test  default_libc_feupdateenv_test
+#endif
+#ifndef libc_feupdateenv_testf
+# define libc_feupdateenv_testf default_libc_feupdateenv_test
+#endif
+#ifndef libc_feupdateenv_testl
+# define libc_feupdateenv_testl default_libc_feupdateenv_test
+#endif
+
 /* Save and set the rounding mode.  The use of fenv_t to store the old mode
    allows a target-specific version of this function to avoid converting the
    rounding mode from the fpu format.  By default we have no choice but to
diff --git a/sysdeps/ieee754/dbl-64/s_fma.c b/sysdeps/ieee754/dbl-64/s_fma.c
index a27e246..ab20a80 100644
--- a/sysdeps/ieee754/dbl-64/s_fma.c
+++ b/sysdeps/ieee754/dbl-64/s_fma.c
@@ -149,35 +149,36 @@ __fma (double x, double y, double z)
 
   fenv_t env;
   libc_feholdexcept_setround (&env, FE_TOWARDZERO);
+
   /* Perform m2 + a2 addition with round to odd.  */
   u.d = a2 + m2;
 
+  if (__builtin_expect (adjust < 0, 0))
+    {
+      if ((u.ieee.mantissa1 & 1) == 0)
+	u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
+      v.d = a1 + u.d;
+    }
+
+  /* Reset rounding mode and test for inexact simultaneously.  */
+  int j = libc_feupdateenv_test (&env, FE_INEXACT) != 0;
+
   if (__builtin_expect (adjust == 0, 1))
     {
       if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
-	u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
-      libc_feupdateenv (&env);
+	u.ieee.mantissa1 |= j;
       /* Result is a1 + u.d.  */
       return a1 + u.d;
     }
   else if (__builtin_expect (adjust > 0, 1))
     {
       if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
-	u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
-      libc_feupdateenv (&env);
+	u.ieee.mantissa1 |= j;
       /* Result is a1 + u.d, scaled up.  */
       return (a1 + u.d) * 0x1p53;
     }
   else
     {
-      if ((u.ieee.mantissa1 & 1) == 0)
-	u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
-      v.d = a1 + u.d;
-      int j = libc_fetestexcept (FE_INEXACT) != 0;
-      libc_feupdateenv (&env);
-      /* Ensure the following computations are performed in default rounding
-	 mode instead of just reusing the round to zero computation.  */
-      asm volatile ("" : "=m" (u) : "m" (u));
       /* If a1 + u.d is exact, the only rounding happens during
 	 scaling down.  */
       if (j == 0)
diff --git a/sysdeps/ieee754/dbl-64/s_fmaf.c b/sysdeps/ieee754/dbl-64/s_fmaf.c
index 00cd382..7a939aa 100644
--- a/sysdeps/ieee754/dbl-64/s_fmaf.c
+++ b/sysdeps/ieee754/dbl-64/s_fmaf.c
@@ -35,12 +35,18 @@ __fmaf (float x, float y, float z)
   /* Multiplication is always exact.  */
   double temp = (double) x * (double) y;
   union ieee754_double u;
-  libc_feholdexcept_setroundf (&env, FE_TOWARDZERO);
+
+  libc_feholdexcept_setround (&env, FE_TOWARDZERO);
+
   /* Perform addition with round to odd.  */
   u.d = temp + (double) z;
+
+  /* Reset rounding mode and test for inexact simultaneously.  */
+  int j = libc_feupdateenv_test (&env, FE_INEXACT) != 0;
+
   if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
-    u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
-  libc_feupdateenv (&env);
+    u.ieee.mantissa1 |= j;
+
   /* And finally truncation with round to nearest.  */
   return (float) u.d;
 }
diff --git a/sysdeps/x86_64/fpu/math_private.h b/sysdeps/x86_64/fpu/math_private.h
index 3289afc..aa208b2 100644
--- a/sysdeps/x86_64/fpu/math_private.h
+++ b/sysdeps/x86_64/fpu/math_private.h
@@ -108,13 +108,33 @@ libc_fesetenv (fenv_t *e)
 #define libc_fesetenv  libc_fesetenv
 #define libc_fesetenvf libc_fesetenv
 
+static __always_inline int
+libc_feupdateenv_test (fenv_t *e, int ex)
+{
+  unsigned int mxcsr, old_mxcsr, cur_ex;
+  asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));
+  cur_ex = mxcsr & FE_ALL_EXCEPT;
+
+  /* Merge current exceptions with the old environment.  */
+  old_mxcsr = e->__mxcsr;
+  mxcsr = old_mxcsr | cur_ex;
+  asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
+
+  /* Raise SIGFPE for any new exceptions since the hold.  Expect that
+     the normal environment has all exceptions masked.  */
+  if (__builtin_expect ((old_mxcsr >> 7) & cur_ex, 0))
+    __feraiseexcept (cur_ex);
+
+  /* Test for exceptions raised since the hold.  */
+  return cur_ex & ex;
+}
+#define libc_feupdateenv_test  libc_feupdateenv_test
+#define libc_feupdateenv_testf libc_feupdateenv_test
+
 static __always_inline void
 libc_feupdateenv (fenv_t *e)
 {
-  unsigned int mxcsr;
-  asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));
-  asm volatile (LDMXCSR " %0" : : "m" ((e)->__mxcsr));
-  __feraiseexcept (mxcsr & FE_ALL_EXCEPT);
+  libc_feupdateenv_test (e, 0);
 }
 #define libc_feupdateenv  libc_feupdateenv
 #define libc_feupdateenvf libc_feupdateenv
-- 
1.7.7.6


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]