This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch, master, updated. glibc-2.12-191-g3e692e0


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  3e692e0518b4f4679352d25102bd47cf3f85c592 (commit)
       via  f3f7372de1401b99f0a318ce09caf73e42d6f022 (commit)
      from  14d43591face21dbd4d51b5c46fa3c17740ddc78 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=3e692e0518b4f4679352d25102bd47cf3f85c592

commit 3e692e0518b4f4679352d25102bd47cf3f85c592
Author: Jakub Jelinek <jakub@redhat.com>
Date:   Fri Oct 15 15:26:06 2010 -0400

    Implement fmal, some fma bugfixes

diff --git a/math/libm-test.inc b/math/libm-test.inc
index 1bec476..ceed18d 100644
--- a/math/libm-test.inc
+++ b/math/libm-test.inc
@@ -2787,8 +2787,24 @@ fma_test (void)
   TEST_fff_f (fma, minus_infty, plus_infty, plus_infty, nan_value, INVALID_EXCEPTION);
   TEST_fff_f (fma, plus_infty, minus_infty, plus_infty, nan_value, INVALID_EXCEPTION);
   TEST_fff_f (fma, minus_infty, minus_infty, minus_infty, nan_value, INVALID_EXCEPTION);
+  TEST_fff_f (fma, plus_infty, 3.5L, minus_infty, nan_value, INVALID_EXCEPTION);
+  TEST_fff_f (fma, minus_infty, -7.5L, minus_infty, nan_value, INVALID_EXCEPTION);
+  TEST_fff_f (fma, -13.5L, plus_infty, plus_infty, nan_value, INVALID_EXCEPTION);
+  TEST_fff_f (fma, minus_infty, 7.5L, plus_infty, nan_value, INVALID_EXCEPTION);
 
   TEST_fff_f (fma, 1.25L, 0.75L, 0.0625L, 1.0L);
+
+  FLOAT fltmax = CHOOSE (LDBL_MAX, DBL_MAX, FLT_MAX,
+			 LDBL_MAX, DBL_MAX, FLT_MAX);
+  TEST_fff_f (fma, -fltmax, -fltmax, minus_infty, minus_infty);
+  TEST_fff_f (fma, fltmax / 2, fltmax / 2, minus_infty, minus_infty);
+  TEST_fff_f (fma, -fltmax, fltmax, plus_infty, plus_infty);
+  TEST_fff_f (fma, fltmax / 2, -fltmax / 4, plus_infty, plus_infty);
+  TEST_fff_f (fma, plus_infty, 4, plus_infty, plus_infty);
+  TEST_fff_f (fma, 2, minus_infty, minus_infty, minus_infty);
+  TEST_fff_f (fma, minus_infty, minus_infty, plus_infty, plus_infty);
+  TEST_fff_f (fma, plus_infty, minus_infty, minus_infty, minus_infty);
+
 #if defined (TEST_FLOAT) && FLT_MANT_DIG == 24
   TEST_fff_f (fma, 0x1.7ff8p+13, 0x1.000002p+0, 0x1.ffffp-24, 0x1.7ff802p+13);
   TEST_fff_f (fma, 0x1.fffp+0, 0x1.00001p+0, -0x1.fffp+0, 0x1.fffp-20);
@@ -2818,6 +2834,15 @@ fma_test (void)
   TEST_fff_f (fma, -0x1.19cab66d73e17p-959, 0x1.c7108a8c5ff51p-107, -0x0.80b0ad65d9b64p-1022, -0x0.80b0ad65d9d59p-1022);
   TEST_fff_f (fma, -0x1.d2eaed6e8e9d3p-979, -0x1.4e066c62ac9ddp-63, -0x0.9245e6b003454p-1022, -0x0.9245c09c5fb5dp-1022);
   TEST_fff_f (fma, 0x1.153d650bb9f06p-907, 0x1.2d01230d48407p-125, -0x0.b278d5acfc3cp-1022, -0x0.b22757123bbe9p-1022);
+  TEST_fff_f (fma, -0x1.fffffffffffffp-711, 0x1.fffffffffffffp-275, 0x1.fffffe00007ffp-983, 0x1.7ffffe00007ffp-983);
+#endif
+#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64
+  TEST_fff_f (fma, -0x8.03fcp+3696L, 0xf.fffffffffffffffp-6140L, 0x8.3ffffffffffffffp-2450L, -0x8.01ecp-2440L);
+  TEST_fff_f (fma, 0x9.fcp+2033L, -0x8.000e1f000ff800fp-3613L, -0xf.fffffffffffc0ffp-1579L, -0xd.fc119fb093ed092p-1577L);
+  TEST_fff_f (fma, 0xc.7fc000003ffffffp-1194L, 0x8.1e0003fffffffffp+15327L, -0x8.fffep+14072L, 0xc.ae9f164020effffp+14136L);
+  TEST_fff_f (fma, -0x8.0001fc000000003p+1798L, 0xcp-2230L, 0x8.f7e000000000007p-468L, -0xc.0002f9ffee10404p-429L);
+  TEST_fff_f (fma, 0xc.0000000000007ffp+10130L, -0x8.000000000000001p+4430L, 0xc.07000000001ffffp+14513L, -0xb.fffffffffffd7e4p+14563L);
+  TEST_fff_f (fma, 0xb.ffffp-4777L, 0x8.000000fffffffffp-11612L, -0x0.3800fff8p-16385L, 0x5.c7fe80c7ffeffffp-16385L);
 #endif
 
   END (fma);
diff --git a/sysdeps/i386/fpu/s_fma.S b/sysdeps/i386/fpu/s_fma.S
deleted file mode 100644
index db4959c..0000000
--- a/sysdeps/i386/fpu/s_fma.S
+++ /dev/null
@@ -1,31 +0,0 @@
-/* Compute (X * Y) + Z as ternary operation.
-   Copyright (C) 1997, 1998 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#include <sysdep.h>
-
-	.text
-ENTRY(__fma)
-	fldl	4(%esp)		// x
-	fmull	12(%esp)	// x * y
-	fldl	20(%esp)	// z : x * y
-	faddp			// (x * y) + z
-	ret
-END(__fma)
-weak_alias (__fma, fma)
diff --git a/sysdeps/i386/fpu/s_fmaf.S b/sysdeps/i386/fpu/s_fmaf.S
deleted file mode 100644
index 5f87553..0000000
--- a/sysdeps/i386/fpu/s_fmaf.S
+++ /dev/null
@@ -1,31 +0,0 @@
-/* Compute (X * Y) + Z as ternary operation.
-   Copyright (C) 1997 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#include <sysdep.h>
-
-	.text
-ENTRY(__fmaf)
-	flds	4(%esp)		// x
-	fmuls	8(%esp)		// x * y
-	flds	12(%esp)	// z : x * y
-	faddp			// (x * y) + z
-	ret
-END(__fmaf)
-weak_alias (__fmaf, fmaf)
diff --git a/sysdeps/i386/fpu/s_fmal.S b/sysdeps/i386/fpu/s_fmal.S
deleted file mode 100644
index 1837f84..0000000
--- a/sysdeps/i386/fpu/s_fmal.S
+++ /dev/null
@@ -1,32 +0,0 @@
-/* Compute (X * Y) + Z as ternary operation.
-   Copyright (C) 1997 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#include <sysdep.h>
-
-	.text
-ENTRY(__fmal)
-	fldt	4(%esp)		// x
-	fldt	16(%esp)	// x : y
-	fmulp			// x * y
-	fldt	28(%esp)	// z : x * y
-	faddp			// (x * y) + z
-	ret
-END(__fmal)
-weak_alias (__fmal, fmal)
diff --git a/sysdeps/ieee754/dbl-64/s_fma.c b/sysdeps/ieee754/dbl-64/s_fma.c
index 911682e..3b0bfd5 100644
--- a/sysdeps/ieee754/dbl-64/s_fma.c
+++ b/sysdeps/ieee754/dbl-64/s_fma.c
@@ -43,6 +43,12 @@ __fma (double x, double y, double z)
       || __builtin_expect (u.ieee.exponent + v.ieee.exponent
 			   <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG, 0))
     {
+      /* If z is Inf, but x and y are finite, the result should be
+	 z rather than NaN.  */
+      if (w.ieee.exponent == 0x7ff
+	  && u.ieee.exponent != 0x7ff
+          && v.ieee.exponent != 0x7ff)
+	return (z + x) + y;
       /* If x or y or z is Inf/NaN, or if fma will certainly overflow,
 	 or if x * y is less than half of DBL_DENORM_MIN,
 	 compute as x * y + z.  */
@@ -165,6 +171,8 @@ __fma (double x, double y, double z)
     }
   else
     {
+      if ((u.ieee.mantissa1 & 1) == 0)
+	u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
       v.d = a1 + u.d;
       int j = fetestexcept (FE_INEXACT) != 0;
       feupdateenv (&env);
diff --git a/sysdeps/ieee754/dbl-64/s_fma.c b/sysdeps/ieee754/ldbl-128/s_fmal.c
similarity index 53%
copy from sysdeps/ieee754/dbl-64/s_fma.c
copy to sysdeps/ieee754/ldbl-128/s_fmal.c
index 911682e..9ec5ba9 100644
--- a/sysdeps/ieee754/dbl-64/s_fma.c
+++ b/sysdeps/ieee754/ldbl-128/s_fmal.c
@@ -27,92 +27,99 @@
    double rounding.  See a paper by Boldo and Melquiond:
    http://www.lri.fr/~melquion/doc/08-tc.pdf  */
 
-double
-__fma (double x, double y, double z)
+long double
+__fmal (long double x, long double y, long double z)
 {
-  union ieee754_double u, v, w;
+  union ieee854_long_double u, v, w;
   int adjust = 0;
   u.d = x;
   v.d = y;
   w.d = z;
   if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
-			>= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG, 0)
-      || __builtin_expect (u.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
-      || __builtin_expect (v.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
-      || __builtin_expect (w.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
+			>= 0x7fff + IEEE854_LONG_DOUBLE_BIAS
+			   - LDBL_MANT_DIG, 0)
+      || __builtin_expect (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
+      || __builtin_expect (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
+      || __builtin_expect (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
       || __builtin_expect (u.ieee.exponent + v.ieee.exponent
-			   <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG, 0))
+			   <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG, 0))
     {
+      /* If z is Inf, but x and y are finite, the result should be
+	 z rather than NaN.  */
+      if (w.ieee.exponent == 0x7fff
+	  && u.ieee.exponent != 0x7fff
+          && v.ieee.exponent != 0x7fff)
+	return (z + x) + y;
       /* If x or y or z is Inf/NaN, or if fma will certainly overflow,
-	 or if x * y is less than half of DBL_DENORM_MIN,
+	 or if x * y is less than half of LDBL_DENORM_MIN,
 	 compute as x * y + z.  */
-      if (u.ieee.exponent == 0x7ff
-	  || v.ieee.exponent == 0x7ff
-	  || w.ieee.exponent == 0x7ff
+      if (u.ieee.exponent == 0x7fff
+	  || v.ieee.exponent == 0x7fff
+	  || w.ieee.exponent == 0x7fff
 	  || u.ieee.exponent + v.ieee.exponent
-	     > 0x7ff + IEEE754_DOUBLE_BIAS
+	     > 0x7fff + IEEE854_LONG_DOUBLE_BIAS
 	  || u.ieee.exponent + v.ieee.exponent
-	     < IEEE754_DOUBLE_BIAS - DBL_MANT_DIG - 2)
+	     < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2)
 	return x * y + z;
       if (u.ieee.exponent + v.ieee.exponent
-	  >= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG)
+	  >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG)
 	{
-	  /* Compute 1p-53 times smaller result and multiply
+	  /* Compute 1p-113 times smaller result and multiply
 	     at the end.  */
 	  if (u.ieee.exponent > v.ieee.exponent)
-	    u.ieee.exponent -= DBL_MANT_DIG;
+	    u.ieee.exponent -= LDBL_MANT_DIG;
 	  else
-	    v.ieee.exponent -= DBL_MANT_DIG;
+	    v.ieee.exponent -= LDBL_MANT_DIG;
 	  /* If x + y exponent is very large and z exponent is very small,
 	     it doesn't matter if we don't adjust it.  */
-	  if (w.ieee.exponent > DBL_MANT_DIG)
-	    w.ieee.exponent -= DBL_MANT_DIG;
+	  if (w.ieee.exponent > LDBL_MANT_DIG)
+	    w.ieee.exponent -= LDBL_MANT_DIG;
 	  adjust = 1;
 	}
-      else if (w.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
+      else if (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
 	{
 	  /* Similarly.
 	     If z exponent is very large and x and y exponents are
 	     very small, it doesn't matter if we don't adjust it.  */
 	  if (u.ieee.exponent > v.ieee.exponent)
 	    {
-	      if (u.ieee.exponent > DBL_MANT_DIG)
-		u.ieee.exponent -= DBL_MANT_DIG;
+	      if (u.ieee.exponent > LDBL_MANT_DIG)
+		u.ieee.exponent -= LDBL_MANT_DIG;
 	    }
-	  else if (v.ieee.exponent > DBL_MANT_DIG)
-	    v.ieee.exponent -= DBL_MANT_DIG;
-	  w.ieee.exponent -= DBL_MANT_DIG;
+	  else if (v.ieee.exponent > LDBL_MANT_DIG)
+	    v.ieee.exponent -= LDBL_MANT_DIG;
+	  w.ieee.exponent -= LDBL_MANT_DIG;
 	  adjust = 1;
 	}
-      else if (u.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
+      else if (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
 	{
-	  u.ieee.exponent -= DBL_MANT_DIG;
+	  u.ieee.exponent -= LDBL_MANT_DIG;
 	  if (v.ieee.exponent)
-	    v.ieee.exponent += DBL_MANT_DIG;
+	    v.ieee.exponent += LDBL_MANT_DIG;
 	  else
-	    v.d *= 0x1p53;
+	    v.d *= 0x1p113L;
 	}
-      else if (v.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
+      else if (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
 	{
-	  v.ieee.exponent -= DBL_MANT_DIG;
+	  v.ieee.exponent -= LDBL_MANT_DIG;
 	  if (u.ieee.exponent)
-	    u.ieee.exponent += DBL_MANT_DIG;
+	    u.ieee.exponent += LDBL_MANT_DIG;
 	  else
-	    u.d *= 0x1p53;
+	    u.d *= 0x1p113L;
 	}
       else /* if (u.ieee.exponent + v.ieee.exponent
-		  <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG) */
+		  <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */
 	{
 	  if (u.ieee.exponent > v.ieee.exponent)
-	    u.ieee.exponent += 2 * DBL_MANT_DIG;
+	    u.ieee.exponent += 2 * LDBL_MANT_DIG;
 	  else
-	    v.ieee.exponent += 2 * DBL_MANT_DIG;
-	  if (w.ieee.exponent <= 4 * DBL_MANT_DIG + 4)
+	    v.ieee.exponent += 2 * LDBL_MANT_DIG;
+	  if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 4)
 	    {
 	      if (w.ieee.exponent)
-		w.ieee.exponent += 2 * DBL_MANT_DIG;
+		w.ieee.exponent += 2 * LDBL_MANT_DIG;
 	      else
-		w.d *= 0x1p106;
+		w.d *= 0x1p226L;
 	      adjust = -1;
 	    }
 	  /* Otherwise x * y should just affect inexact
@@ -123,23 +130,23 @@ __fma (double x, double y, double z)
       z = w.d;
     }
   /* Multiplication m1 + m2 = x * y using Dekker's algorithm.  */
-#define C ((1 << (DBL_MANT_DIG + 1) / 2) + 1)
-  double x1 = x * C;
-  double y1 = y * C;
-  double m1 = x * y;
+#define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1)
+  long double x1 = x * C;
+  long double y1 = y * C;
+  long double m1 = x * y;
   x1 = (x - x1) + x1;
   y1 = (y - y1) + y1;
-  double x2 = x - x1;
-  double y2 = y - y1;
-  double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
+  long double x2 = x - x1;
+  long double y2 = y - y1;
+  long double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
 
   /* Addition a1 + a2 = z + m1 using Knuth's algorithm.  */
-  double a1 = z + m1;
-  double t1 = a1 - z;
-  double t2 = a1 - t1;
+  long double a1 = z + m1;
+  long double t1 = a1 - z;
+  long double t2 = a1 - t1;
   t1 = m1 - t1;
   t2 = z - t2;
-  double a2 = t1 + t2;
+  long double a2 = t1 + t2;
 
   fenv_t env;
   feholdexcept (&env);
@@ -149,22 +156,24 @@ __fma (double x, double y, double z)
 
   if (__builtin_expect (adjust == 0, 1))
     {
-      if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
-	u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
+      if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff)
+	u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
       feupdateenv (&env);
       /* Result is a1 + u.d.  */
       return a1 + u.d;
     }
   else if (__builtin_expect (adjust > 0, 1))
     {
-      if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
-	u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
+      if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff)
+	u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
       feupdateenv (&env);
       /* Result is a1 + u.d, scaled up.  */
-      return (a1 + u.d) * 0x1p53;
+      return (a1 + u.d) * 0x1p113L;
     }
   else
     {
+      if ((u.ieee.mantissa3 & 1) == 0)
+	u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
       v.d = a1 + u.d;
       int j = fetestexcept (FE_INEXACT) != 0;
       feupdateenv (&env);
@@ -174,46 +183,39 @@ __fma (double x, double y, double z)
       /* If a1 + u.d is exact, the only rounding happens during
 	 scaling down.  */
       if (j == 0)
-	return v.d * 0x1p-106;
+	return v.d * 0x1p-226L;
       /* If result rounded to zero is not subnormal, no double
 	 rounding will occur.  */
-      if (v.ieee.exponent > 106)
-	return (a1 + u.d) * 0x1p-106;
-      /* If v.d * 0x1p-106 with round to zero is a subnormal above
-	 or equal to DBL_MIN / 2, then v.d * 0x1p-106 shifts mantissa
-	 down just by 1 bit, which means v.ieee.mantissa1 |= j would
+      if (v.ieee.exponent > 226)
+	return (a1 + u.d) * 0x1p-226L;
+      /* If v.d * 0x1p-226L with round to zero is a subnormal above
+	 or equal to LDBL_MIN / 2, then v.d * 0x1p-226L shifts mantissa
+	 down just by 1 bit, which means v.ieee.mantissa3 |= j would
 	 change the round bit, not sticky or guard bit.
-	 v.d * 0x1p-106 never normalizes by shifting up,
+	 v.d * 0x1p-226L never normalizes by shifting up,
 	 so round bit plus sticky bit should be already enough
 	 for proper rounding.  */
-      if (v.ieee.exponent == 106)
+      if (v.ieee.exponent == 226)
 	{
-	  /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,
-	     v.ieee.mantissa1 & 1 is the round bit and j is our sticky
+	  /* v.ieee.mantissa3 & 2 is LSB bit of the result before rounding,
+	     v.ieee.mantissa3 & 1 is the round bit and j is our sticky
 	     bit.  In round-to-nearest 001 rounds down like 00,
 	     011 rounds up, even though 01 rounds down (thus we need
 	     to adjust), 101 rounds down like 10 and 111 rounds up
 	     like 11.  */
-	  if ((v.ieee.mantissa1 & 3) == 1)
+	  if ((v.ieee.mantissa3 & 3) == 1)
 	    {
-	      v.d *= 0x1p-106;
+	      v.d *= 0x1p-226L;
 	      if (v.ieee.negative)
-		return v.d - 0x1p-1074 /* __DBL_DENORM_MIN__ */;
+		return v.d - 0x1p-16493L /* __LDBL_DENORM_MIN__ */;
 	      else
-		return v.d + 0x1p-1074 /* __DBL_DENORM_MIN__ */;
+		return v.d + 0x1p-16493L /* __LDBL_DENORM_MIN__ */;
 	    }
 	  else
-	    return v.d * 0x1p-106;
+	    return v.d * 0x1p-226L;
 	}
-      v.ieee.mantissa1 |= j;
-      return v.d * 0x1p-106;
+      v.ieee.mantissa3 |= j;
+      return v.d * 0x1p-226L;
     }
 }
-#ifndef __fma
-weak_alias (__fma, fma)
-#endif
-
-#ifdef NO_LONG_DOUBLE
-strong_alias (__fma, __fmal)
 weak_alias (__fmal, fmal)
-#endif
diff --git a/sysdeps/ieee754/ldbl-64-128/s_fmal.c b/sysdeps/ieee754/ldbl-64-128/s_fmal.c
new file mode 100644
index 0000000..218aa52
--- /dev/null
+++ b/sysdeps/ieee754/ldbl-64-128/s_fmal.c
@@ -0,0 +1,5 @@
+#include <math_ldbl_opt.h>
+#undef weak_alias
+#define weak_alias(n,a)
+#include <sysdeps/ieee754/ldbl-128/s_fmal.c>
+long_double_symbol (libm, __fmal, fmal);
diff --git a/sysdeps/ieee754/ldbl-96/s_fma.c b/sysdeps/ieee754/ldbl-96/s_fma.c
index 90c6d1f..6c7e9d0 100644
--- a/sysdeps/ieee754/ldbl-96/s_fma.c
+++ b/sysdeps/ieee754/ldbl-96/s_fma.c
@@ -30,11 +30,20 @@
 double
 __fma (double x, double y, double z)
 {
+  if (__builtin_expect (isinf (z), 0))
+    {
+      /* If z is Inf, but x and y are finite, the result should be
+	 z rather than NaN.  */
+      if (finite (x) && finite (y))
+	return (z + x) + y;
+      return (x * y) + z;
+    }
+
   /* Multiplication m1 + m2 = x * y using Dekker's algorithm.  */
 #define C ((1ULL << (LDBL_MANT_DIG + 1) / 2) + 1)
-  long double x1 = x * C;
-  long double y1 = y * C;
-  long double m1 = x * y;
+  long double x1 = (long double) x * C;
+  long double y1 = (long double) y * C;
+  long double m1 = (long double) x * y;
   x1 = (x - x1) + x1;
   y1 = (y - y1) + y1;
   long double x2 = x - x1;
diff --git a/sysdeps/ieee754/dbl-64/s_fma.c b/sysdeps/ieee754/ldbl-96/s_fmal.c
similarity index 59%
copy from sysdeps/ieee754/dbl-64/s_fma.c
copy to sysdeps/ieee754/ldbl-96/s_fmal.c
index 911682e..340f025 100644
--- a/sysdeps/ieee754/dbl-64/s_fma.c
+++ b/sysdeps/ieee754/ldbl-96/s_fmal.c
@@ -27,92 +27,99 @@
    double rounding.  See a paper by Boldo and Melquiond:
    http://www.lri.fr/~melquion/doc/08-tc.pdf  */
 
-double
-__fma (double x, double y, double z)
+long double
+__fmal (long double x, long double y, long double z)
 {
-  union ieee754_double u, v, w;
+  union ieee854_long_double u, v, w;
   int adjust = 0;
   u.d = x;
   v.d = y;
   w.d = z;
   if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
-			>= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG, 0)
-      || __builtin_expect (u.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
-      || __builtin_expect (v.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
-      || __builtin_expect (w.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
+			>= 0x7fff + IEEE854_LONG_DOUBLE_BIAS
+			   - LDBL_MANT_DIG, 0)
+      || __builtin_expect (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
+      || __builtin_expect (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
+      || __builtin_expect (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
       || __builtin_expect (u.ieee.exponent + v.ieee.exponent
-			   <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG, 0))
+			   <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG, 0))
     {
+      /* If z is Inf, but x and y are finite, the result should be
+	 z rather than NaN.  */
+      if (w.ieee.exponent == 0x7fff
+	  && u.ieee.exponent != 0x7fff
+          && v.ieee.exponent != 0x7fff)
+	return (z + x) + y;
       /* If x or y or z is Inf/NaN, or if fma will certainly overflow,
-	 or if x * y is less than half of DBL_DENORM_MIN,
+	 or if x * y is less than half of LDBL_DENORM_MIN,
 	 compute as x * y + z.  */
-      if (u.ieee.exponent == 0x7ff
-	  || v.ieee.exponent == 0x7ff
-	  || w.ieee.exponent == 0x7ff
+      if (u.ieee.exponent == 0x7fff
+	  || v.ieee.exponent == 0x7fff
+	  || w.ieee.exponent == 0x7fff
 	  || u.ieee.exponent + v.ieee.exponent
-	     > 0x7ff + IEEE754_DOUBLE_BIAS
+	     > 0x7fff + IEEE854_LONG_DOUBLE_BIAS
 	  || u.ieee.exponent + v.ieee.exponent
-	     < IEEE754_DOUBLE_BIAS - DBL_MANT_DIG - 2)
+	     < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2)
 	return x * y + z;
       if (u.ieee.exponent + v.ieee.exponent
-	  >= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG)
+	  >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG)
 	{
-	  /* Compute 1p-53 times smaller result and multiply
+	  /* Compute 1p-64 times smaller result and multiply
 	     at the end.  */
 	  if (u.ieee.exponent > v.ieee.exponent)
-	    u.ieee.exponent -= DBL_MANT_DIG;
+	    u.ieee.exponent -= LDBL_MANT_DIG;
 	  else
-	    v.ieee.exponent -= DBL_MANT_DIG;
+	    v.ieee.exponent -= LDBL_MANT_DIG;
 	  /* If x + y exponent is very large and z exponent is very small,
 	     it doesn't matter if we don't adjust it.  */
-	  if (w.ieee.exponent > DBL_MANT_DIG)
-	    w.ieee.exponent -= DBL_MANT_DIG;
+	  if (w.ieee.exponent > LDBL_MANT_DIG)
+	    w.ieee.exponent -= LDBL_MANT_DIG;
 	  adjust = 1;
 	}
-      else if (w.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
+      else if (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
 	{
 	  /* Similarly.
 	     If z exponent is very large and x and y exponents are
 	     very small, it doesn't matter if we don't adjust it.  */
 	  if (u.ieee.exponent > v.ieee.exponent)
 	    {
-	      if (u.ieee.exponent > DBL_MANT_DIG)
-		u.ieee.exponent -= DBL_MANT_DIG;
+	      if (u.ieee.exponent > LDBL_MANT_DIG)
+		u.ieee.exponent -= LDBL_MANT_DIG;
 	    }
-	  else if (v.ieee.exponent > DBL_MANT_DIG)
-	    v.ieee.exponent -= DBL_MANT_DIG;
-	  w.ieee.exponent -= DBL_MANT_DIG;
+	  else if (v.ieee.exponent > LDBL_MANT_DIG)
+	    v.ieee.exponent -= LDBL_MANT_DIG;
+	  w.ieee.exponent -= LDBL_MANT_DIG;
 	  adjust = 1;
 	}
-      else if (u.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
+      else if (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
 	{
-	  u.ieee.exponent -= DBL_MANT_DIG;
+	  u.ieee.exponent -= LDBL_MANT_DIG;
 	  if (v.ieee.exponent)
-	    v.ieee.exponent += DBL_MANT_DIG;
+	    v.ieee.exponent += LDBL_MANT_DIG;
 	  else
-	    v.d *= 0x1p53;
+	    v.d *= 0x1p64L;
 	}
-      else if (v.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
+      else if (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
 	{
-	  v.ieee.exponent -= DBL_MANT_DIG;
+	  v.ieee.exponent -= LDBL_MANT_DIG;
 	  if (u.ieee.exponent)
-	    u.ieee.exponent += DBL_MANT_DIG;
+	    u.ieee.exponent += LDBL_MANT_DIG;
 	  else
-	    u.d *= 0x1p53;
+	    u.d *= 0x1p64L;
 	}
       else /* if (u.ieee.exponent + v.ieee.exponent
-		  <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG) */
+		  <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */
 	{
 	  if (u.ieee.exponent > v.ieee.exponent)
-	    u.ieee.exponent += 2 * DBL_MANT_DIG;
+	    u.ieee.exponent += 2 * LDBL_MANT_DIG;
 	  else
-	    v.ieee.exponent += 2 * DBL_MANT_DIG;
-	  if (w.ieee.exponent <= 4 * DBL_MANT_DIG + 4)
+	    v.ieee.exponent += 2 * LDBL_MANT_DIG;
+	  if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 4)
 	    {
 	      if (w.ieee.exponent)
-		w.ieee.exponent += 2 * DBL_MANT_DIG;
+		w.ieee.exponent += 2 * LDBL_MANT_DIG;
 	      else
-		w.d *= 0x1p106;
+		w.d *= 0x1p128L;
 	      adjust = -1;
 	    }
 	  /* Otherwise x * y should just affect inexact
@@ -123,23 +130,23 @@ __fma (double x, double y, double z)
       z = w.d;
     }
   /* Multiplication m1 + m2 = x * y using Dekker's algorithm.  */
-#define C ((1 << (DBL_MANT_DIG + 1) / 2) + 1)
-  double x1 = x * C;
-  double y1 = y * C;
-  double m1 = x * y;
+#define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1)
+  long double x1 = x * C;
+  long double y1 = y * C;
+  long double m1 = x * y;
   x1 = (x - x1) + x1;
   y1 = (y - y1) + y1;
-  double x2 = x - x1;
-  double y2 = y - y1;
-  double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
+  long double x2 = x - x1;
+  long double y2 = y - y1;
+  long double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
 
   /* Addition a1 + a2 = z + m1 using Knuth's algorithm.  */
-  double a1 = z + m1;
-  double t1 = a1 - z;
-  double t2 = a1 - t1;
+  long double a1 = z + m1;
+  long double t1 = a1 - z;
+  long double t2 = a1 - t1;
   t1 = m1 - t1;
   t2 = z - t2;
-  double a2 = t1 + t2;
+  long double a2 = t1 + t2;
 
   fenv_t env;
   feholdexcept (&env);
@@ -149,7 +156,7 @@ __fma (double x, double y, double z)
 
   if (__builtin_expect (adjust == 0, 1))
     {
-      if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
+      if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7fff)
 	u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
       feupdateenv (&env);
       /* Result is a1 + u.d.  */
@@ -157,14 +164,16 @@ __fma (double x, double y, double z)
     }
   else if (__builtin_expect (adjust > 0, 1))
     {
-      if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
+      if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7fff)
 	u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
       feupdateenv (&env);
       /* Result is a1 + u.d, scaled up.  */
-      return (a1 + u.d) * 0x1p53;
+      return (a1 + u.d) * 0x1p64L;
     }
   else
     {
+      if ((u.ieee.mantissa1 & 1) == 0)
+	u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
       v.d = a1 + u.d;
       int j = fetestexcept (FE_INEXACT) != 0;
       feupdateenv (&env);
@@ -174,19 +183,19 @@ __fma (double x, double y, double z)
       /* If a1 + u.d is exact, the only rounding happens during
 	 scaling down.  */
       if (j == 0)
-	return v.d * 0x1p-106;
+	return v.d * 0x1p-128L;
       /* If result rounded to zero is not subnormal, no double
 	 rounding will occur.  */
-      if (v.ieee.exponent > 106)
-	return (a1 + u.d) * 0x1p-106;
-      /* If v.d * 0x1p-106 with round to zero is a subnormal above
-	 or equal to DBL_MIN / 2, then v.d * 0x1p-106 shifts mantissa
+      if (v.ieee.exponent > 128)
+	return (a1 + u.d) * 0x1p-128L;
+      /* If v.d * 0x1p-128L with round to zero is a subnormal above
+	 or equal to LDBL_MIN / 2, then v.d * 0x1p-128L shifts mantissa
 	 down just by 1 bit, which means v.ieee.mantissa1 |= j would
 	 change the round bit, not sticky or guard bit.
-	 v.d * 0x1p-106 never normalizes by shifting up,
+	 v.d * 0x1p-128L never normalizes by shifting up,
 	 so round bit plus sticky bit should be already enough
 	 for proper rounding.  */
-      if (v.ieee.exponent == 106)
+      if (v.ieee.exponent == 128)
 	{
 	  /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,
 	     v.ieee.mantissa1 & 1 is the round bit and j is our sticky
@@ -196,24 +205,17 @@ __fma (double x, double y, double z)
 	     like 11.  */
 	  if ((v.ieee.mantissa1 & 3) == 1)
 	    {
-	      v.d *= 0x1p-106;
+	      v.d *= 0x1p-128L;
 	      if (v.ieee.negative)
-		return v.d - 0x1p-1074 /* __DBL_DENORM_MIN__ */;
+		return v.d - 0x1p-16445L /* __LDBL_DENORM_MIN__ */;
 	      else
-		return v.d + 0x1p-1074 /* __DBL_DENORM_MIN__ */;
+		return v.d + 0x1p-16445L /* __LDBL_DENORM_MIN__ */;
 	    }
 	  else
-	    return v.d * 0x1p-106;
+	    return v.d * 0x1p-128L;
 	}
       v.ieee.mantissa1 |= j;
-      return v.d * 0x1p-106;
+      return v.d * 0x1p-128L;
     }
 }
-#ifndef __fma
-weak_alias (__fma, fma)
-#endif
-
-#ifdef NO_LONG_DOUBLE
-strong_alias (__fma, __fmal)
 weak_alias (__fmal, fmal)
-#endif

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=f3f7372de1401b99f0a318ce09caf73e42d6f022

commit f3f7372de1401b99f0a318ce09caf73e42d6f022
Author: Jakub Jelinek <jakub@redhat.com>
Date:   Fri Oct 15 15:25:14 2010 -0400

    Fix some more dbl-64/s_fma.c issue

diff --git a/ChangeLog b/ChangeLog
index a6af960..62ba40b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2010-10-14  Jakub Jelinek  <jakub@redhat.com>
+
+	[BZ #3268]
+	* math/libm-test.inc (fma_test): Add some more tests.
+	* sysdeps/ieee754/dbl-64/s_fma.c (__fma): Handle underflows
+	correctly.
+
 2010-10-15  Andreas Schwab  <schwab@redhat.com>
 
 	* scripts/data/localplt-s390-linux-gnu.data: New file.
diff --git a/math/libm-test.inc b/math/libm-test.inc
index 34c4fa9..1bec476 100644
--- a/math/libm-test.inc
+++ b/math/libm-test.inc
@@ -2808,6 +2808,16 @@ fma_test (void)
   TEST_fff_f (fma, 0x1.fffffffffffffp+1023, 0x1.001p+0, -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1011);
   TEST_fff_f (fma, -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+0, 0x1.fffffffffffffp+1023, -0x1.ffffffffffffdp+1023);
   TEST_fff_f (fma, 0x1.fffffffffffffp+1023, 2.0, -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1023);
+  TEST_fff_f (fma, 0x1.6a09e667f3bccp-538, 0x1.6a09e667f3bccp-538, 0.0, 0.0);
+  TEST_fff_f (fma, 0x1.deadbeef2feedp-495, 0x1.deadbeef2feedp-495, -0x1.bf86a5786a574p-989, 0x0.0000042625a1fp-1022);
+  TEST_fff_f (fma, 0x1.deadbeef2feedp-503, 0x1.deadbeef2feedp-503, -0x1.bf86a5786a574p-1005, 0x0.0000000004262p-1022);
+  TEST_fff_f (fma, 0x1p-537, 0x1p-538, 0x1p-1074, 0x0.0000000000002p-1022);
+  TEST_fff_f (fma, 0x1.7fffff8p-968, 0x1p-106, 0x0.000001p-1022, 0x0.0000010000001p-1022);
+  TEST_fff_f (fma, 0x1.4000004p-967, 0x1p-106, 0x0.000001p-1022, 0x0.0000010000003p-1022);
+  TEST_fff_f (fma, 0x1.4p-967, -0x1p-106, -0x0.000001p-1022, -0x0.0000010000002p-1022);
+  TEST_fff_f (fma, -0x1.19cab66d73e17p-959, 0x1.c7108a8c5ff51p-107, -0x0.80b0ad65d9b64p-1022, -0x0.80b0ad65d9d59p-1022);
+  TEST_fff_f (fma, -0x1.d2eaed6e8e9d3p-979, -0x1.4e066c62ac9ddp-63, -0x0.9245e6b003454p-1022, -0x0.9245c09c5fb5dp-1022);
+  TEST_fff_f (fma, 0x1.153d650bb9f06p-907, 0x1.2d01230d48407p-125, -0x0.b278d5acfc3cp-1022, -0x0.b22757123bbe9p-1022);
 #endif
 
   END (fma);
diff --git a/sysdeps/ieee754/dbl-64/s_fma.c b/sysdeps/ieee754/dbl-64/s_fma.c
index ca7300c..911682e 100644
--- a/sysdeps/ieee754/dbl-64/s_fma.c
+++ b/sysdeps/ieee754/dbl-64/s_fma.c
@@ -39,15 +39,20 @@ __fma (double x, double y, double z)
 			>= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG, 0)
       || __builtin_expect (u.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
       || __builtin_expect (v.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
-      || __builtin_expect (w.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0))
+      || __builtin_expect (w.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
+      || __builtin_expect (u.ieee.exponent + v.ieee.exponent
+			   <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG, 0))
     {
-      /* If x or y or z is Inf/NaN or if fma will certainly overflow,
+      /* If x or y or z is Inf/NaN, or if fma will certainly overflow,
+	 or if x * y is less than half of DBL_DENORM_MIN,
 	 compute as x * y + z.  */
       if (u.ieee.exponent == 0x7ff
 	  || v.ieee.exponent == 0x7ff
 	  || w.ieee.exponent == 0x7ff
 	  || u.ieee.exponent + v.ieee.exponent
-	     > 0x7ff + IEEE754_DOUBLE_BIAS)
+	     > 0x7ff + IEEE754_DOUBLE_BIAS
+	  || u.ieee.exponent + v.ieee.exponent
+	     < IEEE754_DOUBLE_BIAS - DBL_MANT_DIG - 2)
 	return x * y + z;
       if (u.ieee.exponent + v.ieee.exponent
 	  >= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG)
@@ -87,7 +92,7 @@ __fma (double x, double y, double z)
 	  else
 	    v.d *= 0x1p53;
 	}
-      else
+      else if (v.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
 	{
 	  v.ieee.exponent -= DBL_MANT_DIG;
 	  if (u.ieee.exponent)
@@ -95,6 +100,24 @@ __fma (double x, double y, double z)
 	  else
 	    u.d *= 0x1p53;
 	}
+      else /* if (u.ieee.exponent + v.ieee.exponent
+		  <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG) */
+	{
+	  if (u.ieee.exponent > v.ieee.exponent)
+	    u.ieee.exponent += 2 * DBL_MANT_DIG;
+	  else
+	    v.ieee.exponent += 2 * DBL_MANT_DIG;
+	  if (w.ieee.exponent <= 4 * DBL_MANT_DIG + 4)
+	    {
+	      if (w.ieee.exponent)
+		w.ieee.exponent += 2 * DBL_MANT_DIG;
+	      else
+		w.d *= 0x1p106;
+	      adjust = -1;
+	    }
+	  /* Otherwise x * y should just affect inexact
+	     and nothing else.  */
+	}
       x = u.d;
       y = v.d;
       z = w.d;
@@ -123,18 +146,68 @@ __fma (double x, double y, double z)
   fesetround (FE_TOWARDZERO);
   /* Perform m2 + a2 addition with round to odd.  */
   u.d = a2 + m2;
-  if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
-    u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
-  feupdateenv (&env);
-
-  /* Add that to a1.  */
-  a1 = a1 + u.d;
 
-  /* And adjust exponent if needed.  */
-  if (__builtin_expect (adjust, 0))
-    a1 *= 0x1p53;
-
-  return a1;
+  if (__builtin_expect (adjust == 0, 1))
+    {
+      if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
+	u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
+      feupdateenv (&env);
+      /* Result is a1 + u.d.  */
+      return a1 + u.d;
+    }
+  else if (__builtin_expect (adjust > 0, 1))
+    {
+      if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
+	u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
+      feupdateenv (&env);
+      /* Result is a1 + u.d, scaled up.  */
+      return (a1 + u.d) * 0x1p53;
+    }
+  else
+    {
+      v.d = a1 + u.d;
+      int j = fetestexcept (FE_INEXACT) != 0;
+      feupdateenv (&env);
+      /* Ensure the following computations are performed in default rounding
+	 mode instead of just reusing the round to zero computation.  */
+      asm volatile ("" : "=m" (u) : "m" (u));
+      /* If a1 + u.d is exact, the only rounding happens during
+	 scaling down.  */
+      if (j == 0)
+	return v.d * 0x1p-106;
+      /* If result rounded to zero is not subnormal, no double
+	 rounding will occur.  */
+      if (v.ieee.exponent > 106)
+	return (a1 + u.d) * 0x1p-106;
+      /* If v.d * 0x1p-106 with round to zero is a subnormal above
+	 or equal to DBL_MIN / 2, then v.d * 0x1p-106 shifts mantissa
+	 down just by 1 bit, which means v.ieee.mantissa1 |= j would
+	 change the round bit, not sticky or guard bit.
+	 v.d * 0x1p-106 never normalizes by shifting up,
+	 so round bit plus sticky bit should be already enough
+	 for proper rounding.  */
+      if (v.ieee.exponent == 106)
+	{
+	  /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,
+	     v.ieee.mantissa1 & 1 is the round bit and j is our sticky
+	     bit.  In round-to-nearest 001 rounds down like 00,
+	     011 rounds up, even though 01 rounds down (thus we need
+	     to adjust), 101 rounds down like 10 and 111 rounds up
+	     like 11.  */
+	  if ((v.ieee.mantissa1 & 3) == 1)
+	    {
+	      v.d *= 0x1p-106;
+	      if (v.ieee.negative)
+		return v.d - 0x1p-1074 /* __DBL_DENORM_MIN__ */;
+	      else
+		return v.d + 0x1p-1074 /* __DBL_DENORM_MIN__ */;
+	    }
+	  else
+	    return v.d * 0x1p-106;
+	}
+      v.ieee.mantissa1 |= j;
+      return v.d * 0x1p-106;
+    }
 }
 #ifndef __fma
 weak_alias (__fma, fma)

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                                          |    7 +
 math/libm-test.inc                                 |   35 +++
 sysdeps/i386/fpu/s_fma.S                           |   31 ---
 sysdeps/i386/fpu/s_fmaf.S                          |   31 ---
 sysdeps/i386/fpu/s_fmal.S                          |   32 ---
 sysdeps/ieee754/dbl-64/s_fma.c                     |  111 +++++++++--
 sysdeps/ieee754/ldbl-128/s_fmal.c                  |  221 ++++++++++++++++++++
 sysdeps/ieee754/{ldbl-opt => ldbl-64-128}/s_fmal.c |    2 +-
 sysdeps/ieee754/ldbl-96/s_fma.c                    |   15 +-
 sysdeps/ieee754/ldbl-96/s_fmal.c                   |  221 ++++++++++++++++++++
 10 files changed, 593 insertions(+), 113 deletions(-)
 delete mode 100644 sysdeps/i386/fpu/s_fma.S
 delete mode 100644 sysdeps/i386/fpu/s_fmaf.S
 delete mode 100644 sysdeps/i386/fpu/s_fmal.S
 create mode 100644 sysdeps/ieee754/ldbl-128/s_fmal.c
 copy sysdeps/ieee754/{ldbl-opt => ldbl-64-128}/s_fmal.c (70%)
 create mode 100644 sysdeps/ieee754/ldbl-96/s_fmal.c


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]