This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH] Reduce the maximum precision for exp and log
- From: Siddhesh Poyarekar <siddhesh at redhat dot com>
- To: libc-alpha at sourceware dot org
- Date: Thu, 28 Feb 2013 21:34:27 +0530
- Subject: [PATCH] Reduce the maximum precision for exp and log
Hi,
The worst case precision required for exp() and log() are known[1] to
be 158 bits and 118 bits respectively. This patch uses this knowledge
to limit the mp computations of both these functions to minimum
precision larger than these values. No regressions were seen in the
testsuite due to this. I could not find a slow path-invoking input
for log, so I don't have measurements for it. For exp however, I have
stats on x86_64:
Without patch:
exp: ITERS:100000: TOTAL:3715573856ns, MAX:48685.347000ns, MIN:36759.609000ns, 26.913743/ms
With patch:
exp: ITERS:100000: TOTAL:209115362ns, MAX:5950.700000ns, MIN:1963.101000ns, 478.204944/ms
So with the patch it is about 17.77 times faster.
Siddhesh
[1] http://perso.ens-lyon.fr/jean-michel.muller/TMDworstcases.pdf
* sysdeps/ieee754/dbl-64/e_log.c (__ieee754_log): Set
precision as 5 for mp fallback.
* sysdeps/ieee754/dbl-64/slowexp.c (__slowexp): Set maximum
precision as 7 for mp fallback.
diff --git a/sysdeps/ieee754/dbl-64/e_log.c b/sysdeps/ieee754/dbl-64/e_log.c
index 762639b..f129374 100644
--- a/sysdeps/ieee754/dbl-64/e_log.c
+++ b/sysdeps/ieee754/dbl-64/e_log.c
@@ -53,21 +53,19 @@ void __mplog(mp_no *, mp_no *, int);
double
SECTION
__ieee754_log(double x) {
-#define M 4
- static const int pr[M]={8,10,18,32};
int i,j,n,ux,dx,p;
#if 0
int k;
#endif
double dbl_n,u,p0,q,r0,w,nln2a,luai,lubi,lvaj,lvbj,
- sij,ssij,ttij,A,B,B0,y,y1,y2,polI,polII,sa,sb,
+ sij,ssij,ttij,A,B,B0,y,polI,polII,sa,sb,
t1,t2,t7,t8,t,ra,rb,ww,
a0,aa0,s1,s2,ss2,s3,ss3,a1,aa1,a,aa,b,bb,c;
#ifndef DLA_FMS
double t3,t4,t5,t6;
#endif
number num;
- mp_no mpx,mpy,mpy1,mpy2,mperr;
+ mp_no mpx,mpy;
#include "ulog.tbl"
#include "ulog.h"
@@ -199,18 +197,23 @@ __ieee754_log(double x) {
/* Final stages. Use multi-precision arithmetic. */
- stage_n:
-
- for (i=0; i<M; i++) {
- p = pr[i];
- __dbl_mp(x,&mpx,p); __dbl_mp(y,&mpy,p);
- __mplog(&mpx,&mpy,p);
- __dbl_mp(e[i].d,&mperr,p);
- __add(&mpy,&mperr,&mpy1,p); __sub(&mpy,&mperr,&mpy2,p);
- __mp_dbl(&mpy1,&y1,p); __mp_dbl(&mpy2,&y2,p);
- if (y1==y2) return y1;
- }
- return y1;
+ stage_n:
+
+ /* The paper titled "Worst Cases for Correct Rounding of the Elementary
+ Functions in Double Precision" by Vincent Lefevre and Jean-Michel Muller
+ found via exhaustive search that 113 bits of
+ precision is sufficient for the worst case input to the natural log
+ function.
+
+ http://perso.ens-lyon.fr/jean-michel.muller/TMDworstcases.pdf */
+
+ p = 5;
+ __dbl_mp(x,&mpx,p);
+ __dbl_mp(y,&mpy,p);
+ __mplog(&mpx,&mpy,p);
+ __mp_dbl(&mpy,&y,p);
+
+ return y;
}
#ifndef __ieee754_log
strong_alias (__ieee754_log, __log_finite)
diff --git a/sysdeps/ieee754/dbl-64/slowexp.c b/sysdeps/ieee754/dbl-64/slowexp.c
index 8f353f6..e1bea93 100644
--- a/sysdeps/ieee754/dbl-64/slowexp.c
+++ b/sysdeps/ieee754/dbl-64/slowexp.c
@@ -44,31 +44,30 @@ SECTION
__slowexp (double x)
{
#ifndef USE_LONG_DOUBLE_FOR_MP
- double w, z, res, eps = 3.0e-26;
int p;
- mp_no mpx, mpy, mpz, mpw, mpeps, mpcor;
+ mp_no mpx, mpy;
+ double res;
+
+ /* The paper titled "Worst Cases for Correct Rounding of the Elementary
+ Functions in Double Precision" by Vincent Lefevre and Jean-Michel Muller
+ found via exhaustive search that for values of |X| >= 2^-30, 113 bits of
+ precision is sufficient while for those below, 158 bits of precision is
+ sufficient. For numbers below 2^-54 we can just return 1 when rounding to
+ nearest.
+
+ http://perso.ens-lyon.fr/jean-michel.muller/TMDworstcases.pdf */
+
+ if (ABS (x) >= 0x1.0p-30)
+ p = 5;
+ else if (ABS (x) < 0x1.0p-54)
+ return 1.0;
+ else
+ p = 7;
- /* Use the multiple precision __MPEXP function to compute the exponential
- First at 144 bits and if it is not accurate enough, at 768 bits. */
- p = 6;
__dbl_mp (x, &mpx, p);
__mpexp (&mpx, &mpy, p);
- __dbl_mp (eps, &mpeps, p);
- __mul (&mpeps, &mpy, &mpcor, p);
- __add (&mpy, &mpcor, &mpw, p);
- __sub (&mpy, &mpcor, &mpz, p);
- __mp_dbl (&mpw, &w, p);
- __mp_dbl (&mpz, &z, p);
- if (w == z)
- return w;
- else
- {
- p = 32;
- __dbl_mp (x, &mpx, p);
- __mpexp (&mpx, &mpy, p);
- __mp_dbl (&mpy, &res, p);
- return res;
- }
+ __mp_dbl (&mpy, &res, p);
+ return res;
#else
return (double) __ieee754_expl((long double)x);
#endif
diff --git a/sysdeps/ieee754/dbl-64/ulog.h b/sysdeps/ieee754/dbl-64/ulog.h
index eec1eef..08cdff1 100644
--- a/sysdeps/ieee754/dbl-64/ulog.h
+++ b/sysdeps/ieee754/dbl-64/ulog.h
@@ -91,10 +91,6 @@
/**/ e2 = {{0x3c1138ce, 0x00000000} }, /* 2.334e-19 */
/**/ e3 = {{0x3aa1565d, 0x00000000} }, /* 2.801e-26 */
/**/ e4 = {{0x39809d88, 0x00000000} }, /* 1.024e-31 */
-/**/ e[M] ={{{0x37da223a, 0x00000000} }, /* 1.2e-39 */
-/**/ {{0x35c851c4, 0x00000000} }, /* 1.3e-49 */
-/**/ {{0x2ab85e51, 0x00000000} }, /* 6.8e-103 */
-/**/ {{0x17383827, 0x00000000} }},/* 8.1e-197 */
/**/ two54 = {{0x43500000, 0x00000000} }, /* 2**54 */
/**/ u03 = {{0x3f9eb851, 0xeb851eb8} }; /* 0.03 */
@@ -163,10 +159,6 @@
/**/ e2 = {{0x00000000, 0x3c1138ce} }, /* 2.334e-19 */
/**/ e3 = {{0x00000000, 0x3aa1565d} }, /* 2.801e-26 */
/**/ e4 = {{0x00000000, 0x39809d88} }, /* 1.024e-31 */
-/**/ e[M] ={{{0x00000000, 0x37da223a} }, /* 1.2e-39 */
-/**/ {{0x00000000, 0x35c851c4} }, /* 1.3e-49 */
-/**/ {{0x00000000, 0x2ab85e51} }, /* 6.8e-103 */
-/**/ {{0x00000000, 0x17383827} }},/* 8.1e-197 */
/**/ two54 = {{0x00000000, 0x43500000} }, /* 2**54 */
/**/ u03 = {{0xeb851eb8, 0x3f9eb851} }; /* 0.03 */