This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
Re: [PATCH] PowerPC - logb[f|l] optimization for POWER7

From: Adhemerval Zanella <azanella at linux dot vnet dot ibm dot com>
To: "Ryan S. Arnold" <ryan dot arnold at gmail dot com>
Cc: "GNU C. Library" <libc-alpha at sourceware dot org>
Date: Wed, 09 May 2012 13:26:55 -0300
Subject: Re: [PATCH] PowerPC - logb[f|l] optimization for POWER7
References: <4FA911C7.4000301@linux.vnet.ibm.com> <CAAKybw-_Vg+u+5i9Z_jbJH1N3gSsmbn0stJ3+-N9xpWADJpVTg@mail.gmail.com>
On 05/08/2012 12:45 PM, Ryan S. Arnold wrote:
> No colon necessary after "New file:".  It should be "New file.
> Optimize logb for POWER7".
> I prefer that this mention that it #includes the powerpc32/power7/ .c
> file.  Something like:
>
> "New file.  Use powerpc32/power7/logb[fl].c via #include.
ChangeLog following this updates now.


> Roland has indicated that he no longer wants "Contributed by"
> statements.. The git log will serve as attribution.  This applies to
> all files in this patchset.
I already removed the "Contributed by" lines.


> I would like if there were a comment indicating what's going on, i.e.,
> /* Raise FE_DIVBYZERO and return -HUGE_VAL[LF].  */
>
>
> Once again, please comment the special case with an indication of
> what's being returned (if it is a special value indicated by the
> spec).  Please indicate the special value in the comment rather than
> just a magic number that represents it.  This applies to all files in
> this patchset.
I added comments for why the return operations are used for.

>> +  return ret == -0.0 ? 0.0 : ret;
> Is this faster than an builtin abs call?
For POWER7, the compiler will optimize this and by replacing a '0.0' for a '__builtin_abs (ret)' will
just trade a 'xxlor' instruction by 'xsabsdp'. It is not a faster, both have similar latency.


> Should be "Since operations are done with double we don't need additional..."
Fixed.

---

2012-05-09  Adhemerval Zanella  <azanella@linux.vnet.ibm.com>

	* sysdeps/powerpc/powerpc32/power7/fpu/s_logb.c: New file. Optimized
	logb for POWER7.
	* sysdeps/powerpc/powerpc32/power7/fpu/s_logbf.c: New file. Optimized
	logbf for POWER7.
	* sysdeps/powerpc/powerpc32/power7/fpu/s_logbl.c: New file. Optimized
	logbl for POWER7.
	* sysdeps/powerpc/powerpc64/power7/fpu/s_logb.c: New file. Use
	powerpc32/power7/fpu/s_logb.c via #include.
	* sysdeps/powerpc/powerpc64/power7/fpu/s_logbf.c: New file. Use
	powerpc32/power7/fpu/s_logbf.c via #include.
	* sysdeps/powerpc/powerpc64/power7/fpu/s_logbl.c: New file. Use
	powerpc32/power7/fpu/s_logbl.c via #include.


diff --git a/math/libm-test.inc b/math/libm-test.inc
index 542131d..5a38dbf 100644
--- a/math/libm-test.inc
+++ b/math/libm-test.inc
@@ -5376,6 +5376,22 @@ logb_test (void)
   TEST_f_f (logb, 1024, 10);
   TEST_f_f (logb, -2000, 10);
 
+  TEST_f_f (logb, 0x0.1p-127, -131);
+  TEST_f_f (logb, 0x0.01p-127, -135);
+  TEST_f_f (logb, 0x0.011p-127, -135);
+#ifndef TEST_FLOAT
+  TEST_f_f (logb, 0x0.8p-1022, -1023);
+  TEST_f_f (logb, 0x0.1p-1022, -1026);
+  TEST_f_f (logb, 0x0.00111p-1022, -1034);
+  TEST_f_f (logb, 0x0.00001p-1022, -1042);
+  TEST_f_f (logb, 0x0.000011p-1022, -1042);
+  TEST_f_f (logb, 0x0.0000000000001p-1022, -1074);
+#endif
+#if defined TEST_LDOUBLE && LDBL_MIN_EXP - LDBL_MANT_DIG <= -16400
+  TEST_f_f (logb, 0x1p-16400L, -16400);
+  TEST_f_f (logb, 0x.00000000001p-16382L, -16426);
+#endif
+
   END (logb);
 }
 
diff --git a/sysdeps/powerpc/powerpc32/power7/fpu/s_logb.c b/sysdeps/powerpc/powerpc32/power7/fpu/s_logb.c
new file mode 100644
index 0000000..a5d21f4
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power7/fpu/s_logb.c
@@ -0,0 +1,75 @@
+/* logb(). PowerPC/POWER7 version.
+   Copyright (C) 2012 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "math_private.h"
+
+/* This implementation avoid FP to INT conversions by using VSX bitwise
+   instructions over FP values.  */
+
+static const double two1div52 = 2.220446049250313e-16;	/* 1/2**52  */
+static const double two10m1   = -1023.0;		/* 2**10 -1  */
+
+/* FP mask to extract the exponent.  */
+static const union {
+  unsigned long long mask;
+  double d;
+} mask = { 0x7ff0000000000000ULL };
+
+double
+__logb (double x)
+{
+  double ret;
+
+  if (__builtin_expect (x == 0.0, 0))
+    /* Raise FE_DIVBYZERO and return -HUGE_VAL[LF].  */
+    return -1.0 / __builtin_fabs (x);
+
+  /* ret = x & 0x7ff0000000000000;  */
+  asm (
+    "xxland %x0,%x1,%x2\n"
+    "fcfid  %0,%0"
+    : "=f" (ret)
+    : "f" (x), "f" (mask.d));
+  /* ret = (ret >> 52) - 1023.0;  */
+  ret = (ret * two1div52) + two10m1;
+  if (__builtin_expect (ret > -two10m1, 0))
+    /* Multiplication is used to set logb (+-INF) = INF.  */
+    return (x * x);
+  else if (__builtin_expect (ret == two10m1, 0))
+    {
+      /* POSIX specifies that denormal numbers are treated as
+         though they were normalized.  */
+      int32_t lx, ix;
+      int m1, m2, ma;
+
+      EXTRACT_WORDS (ix , lx, x);
+      m1 = (ix == 0) ? 0 : __builtin_clz (ix);
+      m2 = (lx == 0) ? 0 : __builtin_clz (lx);
+      ma = (m1 == 0) ? m2 + 32 : m1;
+      return -1022.0 + (double)(11 - ma);
+    }
+  /* Test to avoid logb_downward (0.0) == -0.0.  */
+  return ret == -0.0 ? 0.0 : ret;
+}
+
+weak_alias (__logb, logb)
+
+#ifdef NO_LONG_DOUBLE
+strong_alias (__logb, __logbl)
+weak_alias (__logb, logbl)
+#endif
diff --git a/sysdeps/powerpc/powerpc32/power7/fpu/s_logbf.c b/sysdeps/powerpc/powerpc32/power7/fpu/s_logbf.c
new file mode 100644
index 0000000..33254b5
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power7/fpu/s_logbf.c
@@ -0,0 +1,60 @@
+/* logbf(). PowerPC/POWER7 version.
+   Copyright (C) 2012 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "math_private.h"
+
+/* This implementation avoid FP to INT conversions by using VSX bitwise
+   instructions over FP values.  */
+
+static const double two1div52 = 2.220446049250313e-16;	/* 1/2**52  */
+static const double two10m1   = -1023.0;		/* -2**10 + 1  */
+static const double two7m1    = -127.0;			/* -2**7 + 1  */
+
+/* FP mask to extract the exponent.  */
+static const union {
+  unsigned long long mask;
+  double d;
+} mask = { 0x7ff0000000000000ULL };
+
+float
+__logbf (float x)
+{
+  /* VSX operation are all done internally as double.  */
+  double ret;
+
+  if (__builtin_expect (x == 0.0, 0))
+    /* Raise FE_DIVBYZERO and return -HUGE_VAL[LF].  */
+    return -1.0 / __builtin_fabsf (x);
+
+  /* ret = x & 0x7f800000;  */
+  asm (
+    "xxland %x0,%x1,%x2\n"
+    "fcfid  %0,%0"
+    : "=f"(ret)
+    : "f" (x), "f" (mask.d));
+  /* ret = (ret >> 52) - 1023.0, since ret is double.  */
+  ret = (ret * two1div52) + two10m1;
+  if (__builtin_expect (ret > -two7m1, 0))
+    /* Multiplication is used to set logb (+-INF) = INF.  */
+    return (x * x);
+  /* Since operations are done with double we don't need
+     additional tests for subnormal numbers.
+     The test is to avoid logb_downward (0.0) == -0.0.  */
+  return ret == -0.0 ? 0.0 : ret;
+}
+weak_alias (__logbf, logbf)
diff --git a/sysdeps/powerpc/powerpc32/power7/fpu/s_logbl.c b/sysdeps/powerpc/powerpc32/power7/fpu/s_logbl.c
new file mode 100644
index 0000000..5dd0cda
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power7/fpu/s_logbl.c
@@ -0,0 +1,72 @@
+/* logbl(). PowerPC/POWER7 version.
+   Copyright (C) 2012 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <math.h>
+#include <math_private.h>
+#include <math_ldbl_opt.h>
+
+/* This implementation avoid FP to INT conversions by using VSX bitwise
+   instructions over FP values.  */
+
+static const double two1div52 = 2.220446049250313e-16;	/* 1/2**52  */
+static const double two10m1   = -1023.0;		/* 2**10 -1  */
+
+/* FP mask to extract the exponent.  */
+static const union {
+  unsigned long long mask;
+  double d;
+} mask = { 0x7ff0000000000000ULL };
+
+long double
+__logbl (long double x)
+{
+  double xh, xl;
+  double ret;
+
+  if (__builtin_expect (x == 0.0L, 0))
+    /* Raise FE_DIVBYZERO and return -HUGE_VAL[LF].  */
+    return -1.0L / __builtin_fabsl (x);
+
+  ldbl_unpack (x, &xh, &xl);
+  /* ret = x & 0x7ff0000000000000;  */
+  asm (
+    "xxland %x0,%x1,%x2\n"
+    "fcfid  %0,%0"
+    : "=f" (ret)
+    : "f" (xh), "f" (mask.d));
+  /* ret = (ret >> 52) - 1023.0;  */
+  ret = (ret * two1div52) + two10m1;
+  if (__builtin_expect (ret > -two10m1, 0))
+    /* Multiplication is used to set logb (+-INF) = INF.  */
+    return (xh * xh);
+  else if (__builtin_expect (ret == two10m1, 0))
+    {
+      int64_t lx, hx;
+      int m1, m2, ma;
+
+      GET_LDOUBLE_WORDS64 (hx, lx, x);
+      m1 = (hx == 0) ? 0 : __builtin_clzll (hx);
+      m2 = (lx == 0) ? 0 : __builtin_clzll (lx);
+      ma = (m1 == 0) ? m2 + 64 : m1;
+      return -1022.0 + (double)(11 - ma);
+    }
+  /* Test to avoid logb_downward (0.0) == -0.0.  */
+  return ret == -0.0 ? 0.0 : ret;
+}
+
+long_double_symbol (libm, __logbl, logbl);
diff --git a/sysdeps/powerpc/powerpc64/power7/fpu/s_logb.c b/sysdeps/powerpc/powerpc64/power7/fpu/s_logb.c
new file mode 100644
index 0000000..ff3a9e0
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power7/fpu/s_logb.c
@@ -0,0 +1 @@
+#include <sysdeps/powerpc/powerpc32/power7/fpu/s_logb.c>
diff --git a/sysdeps/powerpc/powerpc64/power7/fpu/s_logbf.c b/sysdeps/powerpc/powerpc64/power7/fpu/s_logbf.c
new file mode 100644
index 0000000..e79a28f
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power7/fpu/s_logbf.c
@@ -0,0 +1 @@
+#include <sysdeps/powerpc/powerpc32/power7/fpu/s_logbf.c>
diff --git a/sysdeps/powerpc/powerpc64/power7/fpu/s_logbl.c b/sysdeps/powerpc/powerpc64/power7/fpu/s_logbl.c
new file mode 100644
index 0000000..463e411
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power7/fpu/s_logbl.c
@@ -0,0 +1 @@
+#include <sysdeps/powerpc/powerpc32/power7/fpu/s_logbl.c>
-- 
1.6.0.2



-- 
Adhemerval Zanella Netto
  Software Engineer
  Linux Technology Center Brazil
  Toolchain / GLIBC on Power Architecture
  azanella@linux.vnet.ibm.com / azanella@br.ibm.com
  +55 61 8642-9890
Follow-Ups:
- Re: [PATCH] PowerPC - logb[f|l] optimization for POWER7
  - From: Ryan S. Arnold
- Re: [PATCH] PowerPC - logb[f|l] optimization for POWER7
  - From: Andreas Schwab
References:
- [PATCH] PowerPC - logb[f|l] optimization for POWER7
  - From: Adhemerval Zanella
- Re: [PATCH] PowerPC - logb[f|l] optimization for POWER7
  - From: Ryan S. Arnold
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]