This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] log2 and log10 for wordsize-64


On 05/14/2012 05:31 PM, Adhemerval Zanella wrote:
These were in my backlog for while. On x86_64 I observed an improvement of about 7%
for log10 and about 20% for log2. And on PPC64 I observed an improvement of about
20% for log10 and about 30 for log2.

You copied from sysdeps/ieee754/dbl-64 and reformatted the file which makes a diff difficult (unless using -w). Could you send a separate patch that reformats the two files, please?


The patch itself is fine, just a few nits below.


Tested on ppc64 and x86_64.

---

2012-05-14 Adhemerval Zanella<azanella@linux.vnet.ibm.com>

	* sysdeps/ieee754/dbl-64/wordsize-64/e_log10.c: New file.
	* sysdeps/ieee754/dbl-64/wordsize-64/e_log2.c: New file.


diff --git a/sysdeps/ieee754/dbl-64/wordsize-64/e_log10.c b/sysdeps/ieee754/dbl-64/wordsize-64/e_log10.c new file mode 100644 index 0000000..2562a49 --- /dev/null +++ b/sysdeps/ieee754/dbl-64/wordsize-64/e_log10.c @@ -0,0 +1,88 @@ +/* @(#)e_log10.c 5.1 93/09/24 */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +/* __ieee754_log10(x) + * Return the base 10 logarithm of x + * + * Method : + * Let log10_2hi = leading 40 bits of log10(2) and + * log10_2lo = log10(2) - log10_2hi, + * ivln10 = 1/log(10) rounded. + * Then + * n = ilogb(x), + * if(n<0) n = n+1; + * x = scalbn(x,-n); + * log10(x) := n*log10_2hi + (n*log10_2lo + ivln10*log(x)) + * + * Note 1: + * To guarantee log10(10**n)=n, where 10**n is normal, the rounding + * mode must set to Round-to-Nearest. + * Note 2: + * [1/log(10)] rounded to 53 bits has error .198 ulps; + * log10 is monotonic at all binary break points. + * + * Special cases: + * log10(x) is NaN with signal if x< 0; + * log10(+INF) is +INF with no signal; log10(0) is -INF with signal; + * log10(NaN) is that NaN with no signal; + * log10(10**N) = N for N=0,1,...,22. + * + * Constants: + * The hexadecimal values are the intended ones for the following constants. + * The decimal values may be used, provided that the compiler will convert + * from decimal to binary accurately enough to produce the hexadecimal values + * shown. + */ + +#include<math.h> +#include<math_private.h> + +static const double two54 = 1.80143985094819840000e+16; /* 0x4350000000000000 */ +static const double ivln10 = 4.34294481903251816668e-01; /* 0x3FDBCB7B1526E50E */ +static const double log10_2hi = 3.01029995663611771306e-01; /* 0x3FD34413509F6000 */ +static const double log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF311F12B36 */ + +static const double zero = 0.0;

zero seems to be unused in this code.
+
+double
+__ieee754_log10 (double x)
+{
+  double y, z;
+  int64_t i, hx;
+  int32_t k;
+
+  EXTRACT_WORDS64 (hx, x);
+
+  k = 0;
+  if (hx<  INT64_C(0x0010000000000000))
+    {				/* x<  2**-1022  */
+      if (__builtin_expect ((hx&  UINT64_C(0x7fffffffffffffff)) == 0, 0))
+	return -two54 / (x - x);	/* log(+-0)=-inf */
+      if (__builtin_expect (hx<  0, 0))
+	return (x - x) / (x - x);	/* log(-#) = NaN */
+      k -= 54;
+      x *= two54;		/* subnormal number, scale up x */
+      EXTRACT_WORDS64 (hx, x);
+    }
+  /* scale up resulted in a NaN number */
+  if (__builtin_expect (hx>= UINT64_C(0x7ff0000000000000), 0))
+    return x + x;
+  k += (hx>>  52) - 1023;
+  i = ((uint64_t) k&  UINT64_C(0x8000000000000000))>>  63;
+  hx = (hx&  UINT64_C(0x000fffffffffffff)) | ((0x3ff - i)<<  52);
+  y = (double)(k+i);
+  INSERT_WORDS64 (x, hx);
+  z = y * log10_2lo + ivln10 * __ieee754_log (x);
+  return z + y * log10_2hi;
+}
+
+strong_alias (__ieee754_log10, __log10_finite)
diff --git a/sysdeps/ieee754/dbl-64/wordsize-64/e_log2.c b/sysdeps/ieee754/dbl-64/wordsize-64/e_log2.c
new file mode 100644
index 0000000..d532f1f
--- /dev/null
+++ b/sysdeps/ieee754/dbl-64/wordsize-64/e_log2.c
@@ -0,0 +1,129 @@
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunPro, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+
+/* __ieee754_log2(x)
+ * Return the logarithm to base 2 of x
+ *
+ * Method :
+ *   1. Argument Reduction: find k and f such that
+ *			x = 2^k * (1+f),
+ *	   where  sqrt(2)/2<  1+f<  sqrt(2) .
+ *
+ *   2. Approximation of log(1+f).
+ *	Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
+ *		 = 2s + 2/3 s**3 + 2/5 s**5 + .....,
+ *		 = 2s + s*R
+ *      We use a special Reme algorithm on [0,0.1716] to generate
+ *	a polynomial of degree 14 to approximate R The maximum error
+ *	of this polynomial approximation is bounded by 2**-58.45. In
+ *	other words,
+ *			2      4      6      8      10      12      14
+ *	    R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s  +Lg6*s  +Lg7*s
+ *	(the values of Lg1 to Lg7 are listed in the program)
+ *	and
+ *	    |      2          14          |     -58.45
+ *	    | Lg1*s +...+Lg7*s    -  R(z) |<= 2
+ *	    |                             |
+ *	Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
+ *	In order to guarantee error in log below 1ulp, we compute log
+ *	by
+ *		log(1+f) = f - s*(f - R)	(if f is not too large)
+ *		log(1+f) = f - (hfsq - s*(hfsq+R)).	(better accuracy)
+ *
+ *	3. Finally,  log(x) = k + log(1+f).
+ *			    = k+(f-(hfsq-(s*(hfsq+R))))
+ *
+ * Special cases:
+ *	log2(x) is NaN with signal if x<  0 (including -INF) ;
+ *	log2(+INF) is +INF; log(0) is -INF with signal;
+ *	log2(NaN) is that NaN with no signal.
+ *
+ * Constants:
+ * The hexadecimal values are the intended ones for the following
+ * constants. The decimal values may be used, provided that the
+ * compiler will convert from decimal to binary accurately enough
+ * to produce the hexadecimal values shown.
+ */
+
+#include<math.h>
+#include<math_private.h>
+
+static const double ln2 = 0.69314718055994530942;
+static const double two1ln2 = 1.4426950408889636;

Please add as comment: two1ln2 = 1/ln2


+static const double two54 = 1.80143985094819840000e+16;	/* 4350000000000000 */
+static const double Lg1 = 6.666666666666735130e-01;	/* 3FE5555555555593 */
+static const double Lg2 = 3.999999999940941908e-01;	/* 3FD999999997FA04 */
+static const double Lg3 = 2.857142874366239149e-01;	/* 3FD2492494229359 */
+static const double Lg4 = 2.222219843214978396e-01;	/* 3FCC71C51D8E78AF */
+static const double Lg5 = 1.818357216161805012e-01;	/* 3FC7466496CB03DE */
+static const double Lg6 = 1.531383769920937332e-01;	/* 3FC39A09D078C69F */
+static const double Lg7 = 1.479819860511658591e-01;	/* 3FC2F112DF3E5244 */
+
+static const double zero = 0.0;

+
+double
+__ieee754_log2 (double x)
+{
+  double hfsq, f, s, z, R, w, t1, t2, dk;
+  int64_t hx, i, j;
+  int32_t k;
+
+  EXTRACT_WORDS64 (hx, x);
+
+  k = 0;
+  if (hx<  INT64_C(0x0010000000000000))
+    {				/* x<  2**-1022  */
+      if (__builtin_expect ((hx&  UINT64_C(0x7fffffffffffffff)) == 0, 0))
+	return -two54 / (x - x);	/* log(+-0)=-inf */
+      if (__builtin_expect (hx<  0, 0))
+	return (x - x) / (x - x);	/* log(-#) = NaN */
+      k -= 54;
+      x *= two54;		/* subnormal number, scale up x */
+      EXTRACT_WORDS64 (hx, x);
+    }
+  if (__builtin_expect (hx>= UINT64_C(0x7ff0000000000000), 0))
+    return x + x;
+  k += (hx>>  52) - 1023;
+  hx&= UINT64_C(0x000fffffffffffff);
+  i = (hx + UINT64_C(0x95f6400000000))&  UINT64_C(0x10000000000000);
+  /* normalize x or x/2 */
+  INSERT_WORDS64 (x, hx | (i ^ UINT64_C(0x3ff0000000000000)));
+  k += (i>>  52);
+  dk = (double) k;
+  f = x - 1.0;
+  if ((UINT64_C(0x000fffffffffffff)&  (2 + hx))<  3)
+    {				/* |f|<  2**-20 */
+      if (f == zero)
+	return dk;
+      R = f * f * (0.5 - 0.33333333333333333 * f);
+      return dk - (R - f) * two1ln2;
+    }
+  s = f / (2.0 + f);
+  z = s * s;
+  i = hx - UINT64_C(0x6147a00000000);
+  w = z * z;
+  j = UINT64_C(0x6b85100000000) - hx;
+  t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
+  t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
+  i |= j;
+  R = t2 + t1;
+  if (i>  0)
+    {
+      hfsq = 0.5 * f * f;
+      return dk - ((hfsq - (s * (hfsq + R))) - f) * two1ln2;
+    }
+  else
+    {
+      return dk - ((s * (f - R)) - f) * two1ln2;
+    }
+}
+
+strong_alias (__ieee754_log2, __log2_finite)

Andreas -- Andreas Jaeger aj@{suse.com,opensuse.org} Twitter/Identica: jaegerandi SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany GF: Jeff Hawn,Jennifer Guild,Felix Imendörffer,HRB16746 (AG Nürnberg) GPG fingerprint = 93A3 365E CE47 B889 DF7F FED1 389A 563C C272 A126


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]