This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
Fix spurious underflows from pow with results close to 1 (bug 14811)

From: "Joseph S. Myers" <joseph at codesourcery dot com>
To: <libc-alpha at sourceware dot org>
Date: Wed, 7 Nov 2012 00:37:26 +0000
Subject: Fix spurious underflows from pow with results close to 1 (bug 14811)
Bug 14811 is spurious underflow in pow implementations for results
very close to 1, arising from underflow in the y * log(x) calculation
because y is close to 0.  This patch fixes these underflows by
saturating such values (to +/- 0x1p-32 for float, for example, since
the (1 - 0x1p-25)^0x1p32 is about e^-128 < 2^-149, so any
representable float to the power 0x1p-32 gives a result less than
0.5ulp from 1), similar to various such saturation logic for large
exponents.

Tested x86_64 (multi-arch both enabled and disabled) and x86.

The issue doesn't appear for the i386 float and double implementations
because the internal calculations are done in x87 registers so have
extra exponent range avoiding these underflows when the inputs are
float or double.  I don't know if the issue appears for ldbl-128,
ldbl-128ibm, ia64 or m68k - if it does (as shown by the tests added to
the testsuite) then similar saturation logic should be straightforward
to add (but each implementation is substantially different - unlike
many functions, this isn't a case of substantially the same code used
in multiple places with different constants).

2012-11-07  Joseph Myers  <joseph@codesourcery.com>

	[BZ #14811]
	* sysdeps/i386/fpu/e_powl.S (pm79): New object.
	(__ieee754_powl): Saturate nonzero exponents with absolute value
	below 0x1p-79 to +/- 0x1p-79.
	* sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Saturate nonzero
	exponents with absolute value below 0x1p-64 to +/- 0x1p-64.
	* sysdeps/ieee754/flt-32/e_powf.c (__ieee754_powf): Saturate
	nonzero exponents with absolute value below 0x1p-32 to +/-
	0x1p-32.
	* sysdeps/x86_64/fpu/e_powl.S (pm79): New object.
	(__ieee754_powl): Saturate nonzero exponents with absolute value
	below 0x1p-79 to +/- 0x1p-79.
	* math/libm-test.inc (pow_test): Add more tests.

diff --git a/math/libm-test.inc b/math/libm-test.inc
index a52ce6a..74488e7 100644
--- a/math/libm-test.inc
+++ b/math/libm-test.inc
@@ -7743,10 +7743,12 @@ pow_test (void)
   TEST_ff_f (pow, plus_infty, 1e-7L, plus_infty);
   TEST_ff_f (pow, plus_infty, 1, plus_infty);
   TEST_ff_f (pow, plus_infty, 1e7L, plus_infty);
+  TEST_ff_f (pow, plus_infty, min_subnorm_value, plus_infty);
 
   TEST_ff_f (pow, plus_infty, -1e-7L, 0);
   TEST_ff_f (pow, plus_infty, -1, 0);
   TEST_ff_f (pow, plus_infty, -1e7L, 0);
+  TEST_ff_f (pow, plus_infty, -min_subnorm_value, 0);
 
   TEST_ff_f (pow, minus_infty, 1, minus_infty);
   TEST_ff_f (pow, minus_infty, 11, minus_infty);
@@ -7759,6 +7761,7 @@ pow_test (void)
   TEST_ff_f (pow, minus_infty, 1.1L, plus_infty);
   TEST_ff_f (pow, minus_infty, 11.1L, plus_infty);
   TEST_ff_f (pow, minus_infty, 1001.1L, plus_infty);
+  TEST_ff_f (pow, minus_infty, min_subnorm_value, plus_infty);
 
   TEST_ff_f (pow, minus_infty, -1, minus_zero);
   TEST_ff_f (pow, minus_infty, -11, minus_zero);
@@ -7771,6 +7774,7 @@ pow_test (void)
   TEST_ff_f (pow, minus_infty, -1.1L, 0);
   TEST_ff_f (pow, minus_infty, -11.1L, 0);
   TEST_ff_f (pow, minus_infty, -1001.1L, 0);
+  TEST_ff_f (pow, minus_infty, -min_subnorm_value, 0);
 #endif
 
   TEST_ff_f (pow, nan_value, nan_value, nan_value);
@@ -7793,6 +7797,8 @@ pow_test (void)
   TEST_ff_f (pow, nan_value, minus_infty, nan_value);
   TEST_ff_f (pow, nan_value, 2.5, nan_value);
   TEST_ff_f (pow, nan_value, -2.5, nan_value);
+  TEST_ff_f (pow, nan_value, min_subnorm_value, nan_value);
+  TEST_ff_f (pow, nan_value, -min_subnorm_value, nan_value);
 
   TEST_ff_f (pow, 1, plus_infty, 1);
   TEST_ff_f (pow, -1, plus_infty, 1);
@@ -7806,6 +7812,8 @@ pow_test (void)
   TEST_ff_f (pow, 1, 0x1p63L, 1);
   TEST_ff_f (pow, 1, 0x1p64L, 1);
   TEST_ff_f (pow, 1, 0x1p72L, 1);
+  TEST_ff_f (pow, 1, min_subnorm_value, 1);
+  TEST_ff_f (pow, 1, -min_subnorm_value, 1);
 
   /* pow (x, +-0) == 1.  */
   TEST_ff_f (pow, plus_infty, 0, 1);
@@ -7825,6 +7833,10 @@ pow_test (void)
   TEST_ff_f (pow, -0.1L, -1.1L, nan_value, INVALID_EXCEPTION);
   TEST_ff_f (pow, -10.1L, 1.1L, nan_value, INVALID_EXCEPTION);
   TEST_ff_f (pow, -10.1L, -1.1L, nan_value, INVALID_EXCEPTION);
+  TEST_ff_f (pow, -1.01L, min_subnorm_value, nan_value, INVALID_EXCEPTION);
+  TEST_ff_f (pow, -1.01L, -min_subnorm_value, nan_value, INVALID_EXCEPTION);
+  TEST_ff_f (pow, -1.0L, min_subnorm_value, nan_value, INVALID_EXCEPTION);
+  TEST_ff_f (pow, -1.0L, -min_subnorm_value, nan_value, INVALID_EXCEPTION);
 
   errno = 0;
   TEST_ff_f (pow, 0, -1, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
@@ -7910,6 +7922,9 @@ pow_test (void)
   TEST_ff_f (pow, 0, -11.1L, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
   check_int ("errno for pow(0,-num) == ERANGE", errno, ERANGE, 0, 0, 0);
   errno = 0;
+  TEST_ff_f (pow, 0, -min_subnorm_value, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
+  check_int ("errno for pow(0,-num) == ERANGE", errno, ERANGE, 0, 0, 0);
+  errno = 0;
   TEST_ff_f (pow, 0, -0x1p24, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
   check_int ("errno for pow(0,-num) == ERANGE", errno, ERANGE, 0, 0, 0);
   errno = 0;
@@ -7925,6 +7940,9 @@ pow_test (void)
   TEST_ff_f (pow, minus_zero, -11.1L, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
   check_int ("errno for pow(-0,-num) == ERANGE", errno, ERANGE, 0, 0, 0);
   errno = 0;
+  TEST_ff_f (pow, minus_zero, -min_subnorm_value, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
+  check_int ("errno for pow(-0,-num) == ERANGE", errno, ERANGE, 0, 0, 0);
+  errno = 0;
   TEST_ff_f (pow, minus_zero, -0x1p24, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
   check_int ("errno for pow(-0,-num) == ERANGE", errno, ERANGE, 0, 0, 0);
   errno = 0;
@@ -8114,12 +8132,14 @@ pow_test (void)
   TEST_ff_f (pow, 0.0, 0x1p24, 0.0);
   TEST_ff_f (pow, 0.0, 0x1p127, 0.0);
   TEST_ff_f (pow, 0.0, max_value, 0.0);
+  TEST_ff_f (pow, 0.0, min_subnorm_value, 0.0);
 
   /* pow (-0, y) == +0 for y > 0 and not an odd integer.  */
   TEST_ff_f (pow, minus_zero, 4, 0.0);
   TEST_ff_f (pow, minus_zero, 0x1p24, 0.0);
   TEST_ff_f (pow, minus_zero, 0x1p127, 0.0);
   TEST_ff_f (pow, minus_zero, max_value, 0.0);
+  TEST_ff_f (pow, minus_zero, min_subnorm_value, 0.0);
 
   TEST_ff_f (pow, 16, 0.25L, 2);
   TEST_ff_f (pow, 0x1p64L, 0.125L, 256);
@@ -8407,6 +8427,15 @@ pow_test (void)
   TEST_ff_f (pow, 0x1.0000000000001p0L, -0x1.23456789abcdfp61L, 1.0118762747828234466621210689458255908670e-253L);
 #endif
 
+  TEST_ff_f (pow, min_subnorm_value, min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, min_subnorm_value, -min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, max_value, min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, max_value, -min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, 0.99L, min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, 0.99L, -min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, 1.01L, min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, 1.01L, -min_subnorm_value, 1.0L);
+
   TEST_ff_f (pow, 2.0L, -100000.0L, plus_zero, UNDERFLOW_EXCEPTION);
 
   END (pow);
diff --git a/sysdeps/i386/fpu/e_powl.S b/sysdeps/i386/fpu/e_powl.S
index 933418c..ac4842c 100644
--- a/sysdeps/i386/fpu/e_powl.S
+++ b/sysdeps/i386/fpu/e_powl.S
@@ -38,6 +38,9 @@ p64:	.byte 0, 0, 0, 0, 0, 0, 0xf0, 0x43
 	.type p78,@object
 p78:	.byte 0, 0, 0, 0, 0, 0, 0xd0, 0x44
 	ASM_SIZE_DIRECTIVE(p78)
+	.type pm79,@object
+pm79:	.byte 0, 0, 0, 0, 0, 0, 0, 0x3b
+	ASM_SIZE_DIRECTIVE(pm79)
 
 	.section .rodata.cst16,"aM",@progbits,16
 
@@ -120,9 +123,25 @@ ENTRY(__ieee754_powl)
 	fucomp	%st(1)		// y : x
 	fnstsw
 	sahf
-	jne	3f
+	je	9f
 
-	/* OK, we have an integer value for y.  */
+	// If y has absolute value at most 0x1p-79, then any finite
+	// nonzero x will result in 1.  Saturate y to those bounds to
+	// avoid underflow in the calculation of y*log2(x).
+	fld	%st		// y : y : x
+	fabs			// |y| : y : x
+	fcompl	MO(pm79)	// y : x
+	fnstsw
+	sahf
+	jnc	3f
+	fstp	%st(0)		// pop y
+	fldl	MO(pm79)	// 0x1p-79 : x
+	testb	$2, %dl
+	jnz	3f		// y > 0
+	fchs			// -0x1p-79 : x
+	jmp	3f
+
+9:	/* OK, we have an integer value for y.  */
 	popl	%eax
 	cfi_adjust_cfa_offset (-4)
 	popl	%edx
diff --git a/sysdeps/ieee754/dbl-64/e_pow.c b/sysdeps/ieee754/dbl-64/e_pow.c
index 3fd5e65..5131718 100644
--- a/sysdeps/ieee754/dbl-64/e_pow.c
+++ b/sysdeps/ieee754/dbl-64/e_pow.c
@@ -90,6 +90,10 @@ __ieee754_pow(double x, double y) {
 
     SET_RESTORE_ROUND (FE_TONEAREST);
 
+    /* Avoid internal underflow for tiny y.  The exact value of y does
+       not matter if |y| <= 2**-64.  */
+    if (ABS (y) < 0x1p-64)
+      y = y < 0 ? -0x1p-64 : 0x1p-64;
     z = log1(x,&aa,&error);                                 /* x^y  =e^(y log (X)) */
     t = y*134217729.0;
     y1 = t - (t-y);
diff --git a/sysdeps/ieee754/flt-32/e_powf.c b/sysdeps/ieee754/flt-32/e_powf.c
index 4306947..12c408f 100644
--- a/sysdeps/ieee754/flt-32/e_powf.c
+++ b/sysdeps/ieee754/flt-32/e_powf.c
@@ -141,6 +141,10 @@ __ieee754_powf(float x, float y)
 	    t2 = v-(t1-u);
 	} else {
 	    float s2,s_h,s_l,t_h,t_l;
+	    /* Avoid internal underflow for tiny y.  The exact value
+	       of y does not matter if |y| <= 2**-32.  */
+	    if (iy < 0x2f800000)
+	      SET_FLOAT_WORD (y, (hy & 0x80000000) | 0x2f800000);
 	    n = 0;
 	/* take care subnormal number */
 	    if(ix<0x00800000)
diff --git a/sysdeps/x86_64/fpu/e_powl.S b/sysdeps/x86_64/fpu/e_powl.S
index 4fe23c0..1b37185 100644
--- a/sysdeps/x86_64/fpu/e_powl.S
+++ b/sysdeps/x86_64/fpu/e_powl.S
@@ -38,6 +38,9 @@ p64:	.byte 0, 0, 0, 0, 0, 0, 0xf0, 0x43
 	.type p78,@object
 p78:	.byte 0, 0, 0, 0, 0, 0, 0xd0, 0x44
 	ASM_SIZE_DIRECTIVE(p78)
+	.type pm79,@object
+pm79:	.byte 0, 0, 0, 0, 0, 0, 0, 0x3b
+	ASM_SIZE_DIRECTIVE(pm79)
 
 	.section .rodata.cst16,"aM",@progbits,16
 
@@ -110,9 +113,25 @@ ENTRY(__ieee754_powl)
 	fistpll	-8(%rsp)	// y : x
 	fildll	-8(%rsp)	// int(y) : y : x
 	fucomip	%st(1),%st	// y : x
-	jne	3f
+	je	9f
+
+	// If y has absolute value at most 0x1p-79, then any finite
+	// nonzero x will result in 1.  Saturate y to those bounds to
+	// avoid underflow in the calculation of y*log2(x).
+	fldl	MO(pm79)	// 0x1p-79 : y : x
+	fld	%st(1)		// y : 0x1p-79 : y : x
+	fabs			// |y| : 0x1p-79 : y : x
+	fcomip	%st(1), %st	// 0x1p-79 : y : x
+	fstp	%st(0)		// y : x
+	jnc	3f
+	fstp	%st(0)		// pop y
+	fldl	MO(pm79)	// 0x1p-79 : x
+	testb	$2, %dl
+	jnz	3f		// y > 0
+	fchs			// -0x1p-79 : x
+	jmp	3f
 
-	/* OK, we have an integer value for y.  */
+9:	/* OK, we have an integer value for y.  */
 	mov	-8(%rsp),%eax
 	mov	-4(%rsp),%edx
 	orl	$0, %edx

-- 
Joseph S. Myers
joseph@codesourcery.com
Follow-Ups:
- Re: Fix spurious underflows from pow with results close to 1 (bug 14811)
  - From: Andreas Jaeger
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]