This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
Re: [PATCH] log2 and log10 for wordsize-64

From: Adhemerval Zanella <azanella at linux dot vnet dot ibm dot com>
To: Andreas Jaeger <aj at suse dot com>
Cc: "GNU C. Library" <libc-alpha at sourceware dot org>
Date: Mon, 14 May 2012 17:00:09 -0300
Subject: Re: [PATCH] log2 and log10 for wordsize-64
References: <4FB1254D.3060307@linux.vnet.ibm.com> <4FB14C1D.2060308@suse.com>
On 05/14/2012 03:17 PM, Andreas Jaeger wrote:

> You copied from sysdeps/ieee754/dbl-64 and reformatted the file which 
> makes a diff difficult (unless using -w). Could you send a separate 
> patch that reformats the two files, please?
Sure, I'm attaching a patch with both e_log2.c and e_log10.c reformatted as I did for the
wordsize-64 version. I believe now a compare will be easier.

> zero seems to be unused in this code.
Indeed, I removed it. Also removed it from original source.

> Please add as comment: two1ln2 = 1/ln2
Yeah, this remind that using an multiplication to replace a division on original code is
not the best way to keep accuracy. I get rid of this and now the patch is using an
division as original code. It shows now less performance, but it is still a gain.

Thanks for the review.

---

These were in my backlog for while. On x86_64 I observed an improvement of about 7%
for log10 and about 9% for log2. And on PPC64 I observed an improvement of about
20% for log10 and about 9% for log2.

Tested on ppc64 and x86_64.

---

2012-05-14  Adhemerval Zanella  <azanella@linux.vnet.ibm.com> 

	* sysdeps/ieee754/dbl-64/wordsize-64/e_log10.c: New file.
	* sysdeps/ieee754/dbl-64/wordsize-64/e_log2.c: New file.
	* sysdeps/ieee754/dbl-64/e_log10.c: Fixing indents.
	* sysdeps/ieee754/dbl-64/e_log2.c: Likewise.


diff --git a/sysdeps/ieee754/dbl-64/e_log10.c b/sysdeps/ieee754/dbl-64/e_log10.c
index 9fce937..ab5069e 100644
--- a/sysdeps/ieee754/dbl-64/e_log10.c
+++ b/sysdeps/ieee754/dbl-64/e_log10.c
@@ -46,39 +46,40 @@
 #include <math.h>
 #include <math_private.h>
 
-static const double
-two54      =  1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
-ivln10     =  4.34294481903251816668e-01, /* 0x3FDBCB7B, 0x1526E50E */
-log10_2hi  =  3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */
-log10_2lo  =  3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */
-
-static const double zero   =  0.0;
+static const double two54 = 1.80143985094819840000e+16;		/* 0x43500000, 0x00000000 */
+static const double ivln10 = 4.34294481903251816668e-01;	/* 0x3FDBCB7B, 0x1526E50E */
+static const double log10_2hi = 3.01029995663611771306e-01;	/* 0x3FD34413, 0x509F6000 */
+static const double log10_2lo = 3.69423907715893078616e-13;	/* 0x3D59FEF3, 0x11F12B36 */
 
 double
-__ieee754_log10(double x)
+__ieee754_log10 (double x)
 {
-	double y,z;
-	int32_t i,k,hx;
-	u_int32_t lx;
+  double y, z;
+  int32_t i, k, hx;
+  u_int32_t lx;
 
-	EXTRACT_WORDS(hx,lx,x);
+  EXTRACT_WORDS (hx, lx, x);
 
-	k=0;
-	if (hx < 0x00100000) {			/* x < 2**-1022  */
-	    if (__builtin_expect(((hx&0x7fffffff)|lx)==0, 0))
-		return -two54/(x-x);		/* log(+-0)=-inf */
-	    if (__builtin_expect(hx<0, 0))
-		return (x-x)/(x-x);	/* log(-#) = NaN */
-	    k -= 54; x *= two54; /* subnormal number, scale up x */
-	    GET_HIGH_WORD(hx,x);
-	}
-	if (__builtin_expect(hx >= 0x7ff00000, 0)) return x+x;
-	k += (hx>>20)-1023;
-	i  = ((u_int32_t)k&0x80000000)>>31;
-	hx = (hx&0x000fffff)|((0x3ff-i)<<20);
-	y  = (double)(k+i);
-	SET_HIGH_WORD(x,hx);
-	z  = y*log10_2lo + ivln10*__ieee754_log(x);
-	return  z+y*log10_2hi;
+  k = 0;
+  if (hx < 0x00100000)
+    {				/* x < 2**-1022  */
+      if (__builtin_expect (((hx & 0x7fffffff) | lx) == 0, 0))
+	return -two54 / (x - x);	/* log(+-0)=-inf */
+      if (__builtin_expect (hx < 0, 0))
+	return (x - x) / (x - x);	/* log(-#) = NaN */
+      k -= 54;
+      x *= two54;		/* subnormal number, scale up x */
+      GET_HIGH_WORD (hx, x);
+    }
+  if (__builtin_expect (hx >= 0x7ff00000, 0))
+    return x + x;
+  k += (hx >> 20) - 1023;
+  i = ((u_int32_t) k & 0x80000000) >> 31;
+  hx = (hx & 0x000fffff) | ((0x3ff - i) << 20);
+  y = (double) (k + i);
+  SET_HIGH_WORD (x, hx);
+  z = y * log10_2lo + ivln10 * __ieee754_log (x);
+  return z + y * log10_2hi;
 }
+
 strong_alias (__ieee754_log10, __log10_finite)
diff --git a/sysdeps/ieee754/dbl-64/e_log2.c b/sysdeps/ieee754/dbl-64/e_log2.c
index 6891ee2..4d5cab0 100644
--- a/sysdeps/ieee754/dbl-64/e_log2.c
+++ b/sysdeps/ieee754/dbl-64/e_log2.c
@@ -57,64 +57,72 @@
 #include <math.h>
 #include <math_private.h>
 
-static const double
-ln2 = 0.69314718055994530942,
-two54   =  1.80143985094819840000e+16,  /* 43500000 00000000 */
-Lg1 = 6.666666666666735130e-01,  /* 3FE55555 55555593 */
-Lg2 = 3.999999999940941908e-01,  /* 3FD99999 9997FA04 */
-Lg3 = 2.857142874366239149e-01,  /* 3FD24924 94229359 */
-Lg4 = 2.222219843214978396e-01,  /* 3FCC71C5 1D8E78AF */
-Lg5 = 1.818357216161805012e-01,  /* 3FC74664 96CB03DE */
-Lg6 = 1.531383769920937332e-01,  /* 3FC39A09 D078C69F */
-Lg7 = 1.479819860511658591e-01;  /* 3FC2F112 DF3E5244 */
+static const double ln2 = 0.69314718055994530942;
+static const double two54 = 1.80143985094819840000e+16;	/* 43500000 00000000 */
+static const double Lg1 = 6.666666666666735130e-01;	/* 3FE55555 55555593 */
+static const double Lg2 = 3.999999999940941908e-01;	/* 3FD99999 9997FA04 */
+static const double Lg3 = 2.857142874366239149e-01;	/* 3FD24924 94229359 */
+static const double Lg4 = 2.222219843214978396e-01;	/* 3FCC71C5 1D8E78AF */
+static const double Lg5 = 1.818357216161805012e-01;	/* 3FC74664 96CB03DE */
+static const double Lg6 = 1.531383769920937332e-01;	/* 3FC39A09 D078C69F */
+static const double Lg7 = 1.479819860511658591e-01;	/* 3FC2F112 DF3E5244 */
 
-static const double zero   =  0.0;
+static const double zero = 0.0;
 
 double
-__ieee754_log2(double x)
+__ieee754_log2 (double x)
 {
-	double hfsq,f,s,z,R,w,t1,t2,dk;
-	int32_t k,hx,i,j;
-	u_int32_t lx;
+  double hfsq, f, s, z, R, w, t1, t2, dk;
+  int32_t k, hx, i, j;
+  u_int32_t lx;
 
-	EXTRACT_WORDS(hx,lx,x);
+  EXTRACT_WORDS (hx, lx, x);
 
-	k=0;
-	if (hx < 0x00100000) {			/* x < 2**-1022  */
-	    if (__builtin_expect(((hx&0x7fffffff)|lx)==0, 0))
-		return -two54/(x-x);		/* log(+-0)=-inf */
-	    if (__builtin_expect(hx<0, 0))
-		return (x-x)/(x-x);	/* log(-#) = NaN */
-	    k -= 54; x *= two54; /* subnormal number, scale up x */
-	    GET_HIGH_WORD(hx,x);
-	}
-	if (__builtin_expect(hx >= 0x7ff00000, 0)) return x+x;
-	k += (hx>>20)-1023;
-	hx &= 0x000fffff;
-	i = (hx+0x95f64)&0x100000;
-	SET_HIGH_WORD(x,hx|(i^0x3ff00000));	/* normalize x or x/2 */
-	k += (i>>20);
-	dk = (double) k;
-	f = x-1.0;
-	if((0x000fffff&(2+hx))<3) {	/* |f| < 2**-20 */
-	    if(f==zero) return dk;
-	    R = f*f*(0.5-0.33333333333333333*f);
-	    return dk-(R-f)/ln2;
-	}
-	s = f/(2.0+f);
-	z = s*s;
-	i = hx-0x6147a;
-	w = z*z;
-	j = 0x6b851-hx;
-	t1= w*(Lg2+w*(Lg4+w*Lg6));
-	t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
-	i |= j;
-	R = t2+t1;
-	if(i>0) {
-	    hfsq=0.5*f*f;
-	    return dk-((hfsq-(s*(hfsq+R)))-f)/ln2;
-	} else {
-	    return dk-((s*(f-R))-f)/ln2;
-	}
+  k = 0;
+  if (hx < 0x00100000)
+    {				/* x < 2**-1022  */
+      if (__builtin_expect (((hx & 0x7fffffff) | lx) == 0, 0))
+	return -two54 / (x - x);	/* log(+-0)=-inf */
+      if (__builtin_expect (hx < 0, 0))
+	return (x - x) / (x - x);	/* log(-#) = NaN */
+      k -= 54;
+      x *= two54;		/* subnormal number, scale up x */
+      GET_HIGH_WORD (hx, x);
+    }
+  if (__builtin_expect (hx >= 0x7ff00000, 0))
+    return x + x;
+  k += (hx >> 20) - 1023;
+  hx &= 0x000fffff;
+  i = (hx + 0x95f64) & 0x100000;
+  SET_HIGH_WORD (x, hx | (i ^ 0x3ff00000));	/* normalize x or x/2 */
+  k += (i >> 20);
+  dk = (double) k;
+  f = x - 1.0;
+  if ((0x000fffff & (2 + hx)) < 3)
+    {				/* |f| < 2**-20 */
+      if (f == zero)
+	return dk;
+      R = f * f * (0.5 - 0.33333333333333333 * f);
+      return dk - (R - f) / ln2;
+    }
+  s = f / (2.0 + f);
+  z = s * s;
+  i = hx - 0x6147a;
+  w = z * z;
+  j = 0x6b851 - hx;
+  t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
+  t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
+  i |= j;
+  R = t2 + t1;
+  if (i > 0)
+    {
+      hfsq = 0.5 * f * f;
+      return dk - ((hfsq - (s * (hfsq + R))) - f) / ln2;
+    }
+  else
+    {
+      return dk - ((s * (f - R)) - f) / ln2;
+    }
 }
+
 strong_alias (__ieee754_log2, __log2_finite)
diff --git a/sysdeps/ieee754/dbl-64/wordsize-64/e_log10.c b/sysdeps/ieee754/dbl-64/wordsize-64/e_log10.c
new file mode 100644
index 0000000..488a0ef
--- /dev/null
+++ b/sysdeps/ieee754/dbl-64/wordsize-64/e_log10.c
@@ -0,0 +1,86 @@
+/* @(#)e_log10.c 5.1 93/09/24 */
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunPro, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+
+/* __ieee754_log10(x)
+ * Return the base 10 logarithm of x
+ *
+ * Method :
+ *	Let log10_2hi = leading 40 bits of log10(2) and
+ *	    log10_2lo = log10(2) - log10_2hi,
+ *	    ivln10   = 1/log(10) rounded.
+ *	Then
+ *		n = ilogb(x),
+ *		if(n<0)  n = n+1;
+ *		x = scalbn(x,-n);
+ *		log10(x) := n*log10_2hi + (n*log10_2lo + ivln10*log(x))
+ *
+ * Note 1:
+ *	To guarantee log10(10**n)=n, where 10**n is normal, the rounding
+ *	mode must set to Round-to-Nearest.
+ * Note 2:
+ *	[1/log(10)] rounded to 53 bits has error  .198   ulps;
+ *	log10 is monotonic at all binary break points.
+ *
+ * Special cases:
+ *	log10(x) is NaN with signal if x < 0;
+ *	log10(+INF) is +INF with no signal; log10(0) is -INF with signal;
+ *	log10(NaN) is that NaN with no signal;
+ *	log10(10**N) = N  for N=0,1,...,22.
+ *
+ * Constants:
+ * The hexadecimal values are the intended ones for the following constants.
+ * The decimal values may be used, provided that the compiler will convert
+ * from decimal to binary accurately enough to produce the hexadecimal values
+ * shown.
+ */
+
+#include <math.h>
+#include <math_private.h>
+
+static const double two54 = 1.80143985094819840000e+16;		/* 0x4350000000000000 */
+static const double ivln10 = 4.34294481903251816668e-01;	/* 0x3FDBCB7B1526E50E */
+static const double log10_2hi = 3.01029995663611771306e-01;	/* 0x3FD34413509F6000 */
+static const double log10_2lo = 3.69423907715893078616e-13;	/* 0x3D59FEF311F12B36 */
+
+double
+__ieee754_log10 (double x)
+{
+  double y, z;
+  int64_t i, hx;
+  int32_t k;
+
+  EXTRACT_WORDS64 (hx, x);
+
+  k = 0;
+  if (hx < INT64_C(0x0010000000000000))
+    {				/* x < 2**-1022  */
+      if (__builtin_expect ((hx & UINT64_C(0x7fffffffffffffff)) == 0, 0))
+	return -two54 / (x - x);	/* log(+-0)=-inf */
+      if (__builtin_expect (hx < 0, 0))
+	return (x - x) / (x - x);	/* log(-#) = NaN */
+      k -= 54;
+      x *= two54;		/* subnormal number, scale up x */
+      EXTRACT_WORDS64 (hx, x);
+    }
+  /* scale up resulted in a NaN number  */
+  if (__builtin_expect (hx >= UINT64_C(0x7ff0000000000000), 0))
+    return x + x;
+  k += (hx >> 52) - 1023;
+  i = ((uint64_t) k & UINT64_C(0x8000000000000000)) >> 63;
+  hx = (hx & UINT64_C(0x000fffffffffffff)) | ((0x3ff - i) << 52);
+  y = (double) (k + i);
+  INSERT_WORDS64 (x, hx);
+  z = y * log10_2lo + ivln10 * __ieee754_log (x);
+  return z + y * log10_2hi;
+}
+
+strong_alias (__ieee754_log10, __log10_finite)
diff --git a/sysdeps/ieee754/dbl-64/wordsize-64/e_log2.c b/sysdeps/ieee754/dbl-64/wordsize-64/e_log2.c
new file mode 100644
index 0000000..6dc7b7d
--- /dev/null
+++ b/sysdeps/ieee754/dbl-64/wordsize-64/e_log2.c
@@ -0,0 +1,128 @@
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunPro, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+
+/* __ieee754_log2(x)
+ * Return the logarithm to base 2 of x
+ *
+ * Method :
+ *   1. Argument Reduction: find k and f such that
+ *			x = 2^k * (1+f),
+ *	   where  sqrt(2)/2 < 1+f < sqrt(2) .
+ *
+ *   2. Approximation of log(1+f).
+ *	Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
+ *		 = 2s + 2/3 s**3 + 2/5 s**5 + .....,
+ *		 = 2s + s*R
+ *      We use a special Reme algorithm on [0,0.1716] to generate
+ *	a polynomial of degree 14 to approximate R The maximum error
+ *	of this polynomial approximation is bounded by 2**-58.45. In
+ *	other words,
+ *			2      4      6      8      10      12      14
+ *	    R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s  +Lg6*s  +Lg7*s
+ *	(the values of Lg1 to Lg7 are listed in the program)
+ *	and
+ *	    |      2          14          |     -58.45
+ *	    | Lg1*s +...+Lg7*s    -  R(z) | <= 2
+ *	    |                             |
+ *	Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
+ *	In order to guarantee error in log below 1ulp, we compute log
+ *	by
+ *		log(1+f) = f - s*(f - R)	(if f is not too large)
+ *		log(1+f) = f - (hfsq - s*(hfsq+R)).	(better accuracy)
+ *
+ *	3. Finally,  log(x) = k + log(1+f).
+ *			    = k+(f-(hfsq-(s*(hfsq+R))))
+ *
+ * Special cases:
+ *	log2(x) is NaN with signal if x < 0 (including -INF) ;
+ *	log2(+INF) is +INF; log(0) is -INF with signal;
+ *	log2(NaN) is that NaN with no signal.
+ *
+ * Constants:
+ * The hexadecimal values are the intended ones for the following
+ * constants. The decimal values may be used, provided that the
+ * compiler will convert from decimal to binary accurately enough
+ * to produce the hexadecimal values shown.
+ */
+
+#include <math.h>
+#include <math_private.h>
+
+static const double ln2 = 0.69314718055994530942;
+static const double two54 = 1.80143985094819840000e+16;	/* 4350000000000000 */
+static const double Lg1 = 6.666666666666735130e-01;	/* 3FE5555555555593 */
+static const double Lg2 = 3.999999999940941908e-01;	/* 3FD999999997FA04 */
+static const double Lg3 = 2.857142874366239149e-01;	/* 3FD2492494229359 */
+static const double Lg4 = 2.222219843214978396e-01;	/* 3FCC71C51D8E78AF */
+static const double Lg5 = 1.818357216161805012e-01;	/* 3FC7466496CB03DE */
+static const double Lg6 = 1.531383769920937332e-01;	/* 3FC39A09D078C69F */
+static const double Lg7 = 1.479819860511658591e-01;	/* 3FC2F112DF3E5244 */
+
+static const double zero = 0.0;
+
+double
+__ieee754_log2 (double x)
+{
+  double hfsq, f, s, z, R, w, t1, t2, dk;
+  int64_t hx, i, j;
+  int32_t k;
+
+  EXTRACT_WORDS64 (hx, x);
+
+  k = 0;
+  if (hx < INT64_C(0x0010000000000000))
+    {				/* x < 2**-1022  */
+      if (__builtin_expect ((hx & UINT64_C(0x7fffffffffffffff)) == 0, 0))
+	return -two54 / (x - x);	/* log(+-0)=-inf */
+      if (__builtin_expect (hx < 0, 0))
+	return (x - x) / (x - x);	/* log(-#) = NaN */
+      k -= 54;
+      x *= two54;		/* subnormal number, scale up x */
+      EXTRACT_WORDS64 (hx, x);
+    }
+  if (__builtin_expect (hx >= UINT64_C(0x7ff0000000000000), 0))
+    return x + x;
+  k += (hx >> 52) - 1023;
+  hx &= UINT64_C(0x000fffffffffffff);
+  i = (hx + UINT64_C(0x95f6400000000)) & UINT64_C(0x10000000000000);
+  /* normalize x or x/2 */
+  INSERT_WORDS64 (x, hx | (i ^ UINT64_C(0x3ff0000000000000)));
+  k += (i >> 52);
+  dk = (double) k;
+  f = x - 1.0;
+  if ((UINT64_C(0x000fffffffffffff) & (2 + hx)) < 3)
+    {				/* |f| < 2**-20 */
+      if (f == zero)
+	return dk;
+      R = f * f * (0.5 - 0.33333333333333333 * f);
+      return dk - (R - f) / ln2;
+    }
+  s = f / (2.0 + f);
+  z = s * s;
+  i = hx - UINT64_C(0x6147a00000000);
+  w = z * z;
+  j = UINT64_C(0x6b85100000000) - hx;
+  t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
+  t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
+  i |= j;
+  R = t2 + t1;
+  if (i > 0)
+    {
+      hfsq = 0.5 * f * f;
+      return dk - ((hfsq - (s * (hfsq + R))) - f) / ln2;
+    }
+  else
+    {
+      return dk - ((s * (f - R)) - f) / ln2;
+    }
+}
+
+strong_alias (__ieee754_log2, __log2_finite)
-- 
1.7.5.4


-- 
Adhemerval Zanella Netto
  Software Engineer
  Linux Technology Center Brazil
  Toolchain / GLIBC on Power Architecture
  azanella@linux.vnet.ibm.com / azanella@br.ibm.com
  +55 61 8642-9890
Follow-Ups:
- Re: [PATCH] log2 and log10 for wordsize-64
  - From: Andreas Jaeger
References:
- [PATCH] log2 and log10 for wordsize-64
  - From: Adhemerval Zanella
- Re: [PATCH] log2 and log10 for wordsize-64
  - From: Andreas Jaeger
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]