This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] PowerPC - logb[f|l] optimization for POWER7


On Tuesday, May 08, 2012 14:29:59 Adhemerval Zanella wrote:
> This patch provides optimized logb (1.2x on PPC32 and 2.5x on PPC64),
> logbf (1.1x on PPC32 and 2.2x on PPC64), and logbl (1.3x on PPC32 and
> 50% on PPC64).

Ryan, I'd like you to review this as well.

I assume you have tested this on PPPC32 and PPC64 and run the testsuite 
with no regressions on both? Please always state this.

The patch looks ok to me after cleaning up a few nits I noticed below,
Andreas

> ---
> 
> 2012-05-08  Adhemerval Zanella  <azanella@linux.vnet.ibm.com>
> 
> 	* sysdeps/powerpc/powerpc32/power7/fpu/s_logb.c: New file: optimized
> 	logb for POWER7.
> 	* sysdeps/powerpc/powerpc32/power7/fpu/s_logbf.c: New file: optimized
> 	logbf for POWER7.
> 	* sysdeps/powerpc/powerpc32/power7/fpu/s_logbl.c: New file: optimized
> 	logbl for POWER7.
> 	* sysdeps/powerpc/powerpc64/power7/fpu/s_logb.c: New file: wrapper for
> 	the optimized logb for PPC64.
> 	* sysdeps/powerpc/powerpc64/power7/fpu/s_logbf.c: New file: wrapper for
> 	the optimized logbf for PPC64.
> 	* sysdeps/powerpc/powerpc64/power7/fpu/s_logbl.c: New file: wrapper for
> 	the optimized logbl for PPC64.
> 
> diff --git a/sysdeps/powerpc/powerpc32/power7/fpu/s_logb.c
> b/sysdeps/powerpc/powerpc32/power7/fpu/s_logb.c new file mode 100644
> index 0000000..5f57c49
> --- /dev/null
> +++ b/sysdeps/powerpc/powerpc32/power7/fpu/s_logb.c
> @@ -0,0 +1,74 @@
> +/* logb(). PowerPC/POWER7 version.
> +   Copyright (C) 2012 Free Software Foundation, Inc.
> +   Contributed by Adhemerval Zanella Netto <azanella@br.ibm.com>.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#include "math_private.h"
> +
> +/* This implementation avoid FP to INT conversions by using VSX bitwise
> +   instructions over FP values.  */
> +
> +static const double two1div52 = 2.220446049250313e-16;	/* 1/2**52  */
> +static const double two10m1   = -1023.0;		/* 2**10 -1  */
> +
> +/* FP mask to extract the exponent  */

Please add "." use two spaces afterwards.

> +static const union {
> +  unsigned long long mask;
> +  double d;
> +} mask = { 0x7ff0000000000000ULL };
> +
> +double
> +__logb (double x)
> +{
> +  double ret;
> +
> +  if (__builtin_expect(x == 0.0, 0))
A space before the opening "("

> +    return -1.0 / __builtin_fabs (x);
> +
> +  /* ret = x & 0x7ff0000000000000;  */
> +  asm (
> +    "xxland %x0,%x1,%x2\n"
> +    "fcfid  %0,%0"
> +    : "=f" (ret)
> +    : "f" (x), "f" (mask.d));
> +  /* ret = (ret >> 52) - 1023.0;  */
> +  ret = (ret * two1div52) + two10m1;
> +  if (__builtin_expect (ret > -two10m1, 0))
> +    return (x * x);
> +  else if (__builtin_expect (ret == two10m1, 0))
> +    {
> +      /* POSIX specifies that denormal number is treated as
> +         though it were normalized.  */

The grammar looks wrong, I suggest "...that denormal numbers are treated as 
if they were normalized."

> +      int32_t lx, ix;
> +      int m1, m2, ma;
> +
> +      EXTRACT_WORDS (ix , lx, x);
> +      m1 = (ix == 0) ? 0 : __builtin_clz (ix);
> +      m2 = (lx == 0) ? 0 : __builtin_clz (lx);
> +      ma = (m1 == 0) ? m2 + 32 : m1;
> +      return -1022.0 + (double)(11 - ma);
> +    }
> +  /* Test to avoid logb_downward (0.0) == -0.0  */
Add ".  " at the end.

> +  return ret == -0.0 ? 0.0 : ret;
> +}
> +
> +weak_alias (__logb, logb)
> +
> +#ifdef NO_LONG_DOUBLE
> +strong_alias (__logb, __logbl)
> +weak_alias (__logb, logbl)
> +#endif
> diff --git a/sysdeps/powerpc/powerpc32/power7/fpu/s_logbf.c
> b/sysdeps/powerpc/powerpc32/power7/fpu/s_logbf.c new file mode 100644
> index 0000000..eb276f8
> --- /dev/null
> +++ b/sysdeps/powerpc/powerpc32/power7/fpu/s_logbf.c
> @@ -0,0 +1,59 @@
> +/* logbf(). PowerPC/POWER7 version.
> +   Copyright (C) 2012 Free Software Foundation, Inc.
> +   Contributed by Adhemerval Zanella Netto <azanella@br.ibm.com>.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#include "math_private.h"
> +
> +/* This implementation avoid FP to INT conversions by using VSX bitwise
> +   instructions over FP values.  */
> +
> +static const double two1div52 = 2.220446049250313e-16;	/* 1/2**52  */
> +static const double two10m1   = -1023.0;		/* -2**10 + 1  */
> +static const double two7m1    = -127.0;			/* -2**7 + 1  */
> +
> +/* FP mask to extract the exponent  */

As above.

> +static const union {
> +  unsigned long long mask;
> +  double d;
> +} mask = { 0x7ff0000000000000ULL };
> +
> +float
> +__logbf (float x)
> +{
> +  /* VSX operation are all done internally as double  */

Add ".  " at the end.
> +  double ret;
> +
> +  if (__builtin_expect (x == 0.0, 0))
> +    return -1.0 / __builtin_fabsf (x);
> +
> +  /* ret = x & 0x7f800000;  */
> +  asm (
> +    "xxland %x0,%x1,%x2\n"
> +    "fcfid  %0,%0"
> +    : "=f"(ret)
> +    : "f" (x), "f" (mask.d));
> +  /* ret = (ret >> 52) - 1023.0, since ret is double  */
Add ".  " at the end.

> +  ret = (ret * two1div52) + two10m1;
> +  if (__builtin_expect (ret > -two7m1, 0))
> +    return (x * x);
> +  /* Since operations are done with double, not need to
> +     additional tests for subnormal numbers.
> +     The test is to avoid logb_downward (0.0) == -0.0  */
Add ".  " at the end.

> +  return ret == -0.0 ? 0.0 : ret;
> +}
> +weak_alias (__logbf, logbf)
> diff --git a/sysdeps/powerpc/powerpc32/power7/fpu/s_logbl.c
> b/sysdeps/powerpc/powerpc32/power7/fpu/s_logbl.c new file mode 100644
> index 0000000..e4899f1
> --- /dev/null
> +++ b/sysdeps/powerpc/powerpc32/power7/fpu/s_logbl.c
> @@ -0,0 +1,71 @@
> +/* logbl(). PowerPC/POWER7 version.
> +   Copyright (C) 2012 Free Software Foundation, Inc.
> +   Contributed by Adhemerval Zanella Netto <azanella@br.ibm.com>.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#include <math.h>
> +#include <math_private.h>
> +#include <math_ldbl_opt.h>
> +
> +/* This implementation avoid FP to INT conversions by using VSX bitwise
> +   instructions over FP values.  */
> +
> +static const double two1div52 = 2.220446049250313e-16;	/* 1/2**52  */
> +static const double two10m1   = -1023.0;		/* 2**10 -1  */
> +
> +/* FP mask to extract the exponent  */
Add ".  " at the end.

> +static const union {
> +  unsigned long long mask;
> +  double d;
> +} mask = { 0x7ff0000000000000ULL };
> +
> +long double
> +__logbl (long double x)
> +{
> +  double xh, xl;
> +  double ret;
> +
> +  if (__builtin_expect (x == 0.0L, 0))
> +    return -1.0L / __builtin_fabsl (x);
> +
> +  ldbl_unpack (x, &xh, &xl);
> +  /* ret = x & 0x7ff0000000000000;  */
> +  asm (
> +    "xxland %x0,%x1,%x2\n"
> +    "fcfid  %0,%0"
> +    : "=f" (ret)
> +    : "f" (xh), "f" (mask.d));
> +  /* ret = (ret >> 52) - 1023.0;  */
> +  ret = (ret * two1div52) + two10m1;
> +  if (__builtin_expect (ret > -two10m1, 0))
> +    return (xh * xh);
> +  else if (__builtin_expect (ret == two10m1, 0))
> +    {
> +      int64_t lx, hx;
> +      int m1, m2, ma;
> +
> +      GET_LDOUBLE_WORDS64 (hx, lx, x);
> +      m1 = (hx == 0) ? 0 : __builtin_clzll (hx);
> +      m2 = (lx == 0) ? 0 : __builtin_clzll (lx);
> +      ma = (m1 == 0) ? m2 + 64 : m1;
> +      return -1022.0 + (double)(11 - ma);
> +    }
> +  /* Test to avoid logb_downward (0.0) == -0.0  */
Add ".  " at the end.

> +  return ret == -0.0 ? 0.0 : ret;
> +}
> +
> +long_double_symbol (libm, __logbl, logbl);
> diff --git a/sysdeps/powerpc/powerpc64/power7/fpu/s_logb.c
> b/sysdeps/powerpc/powerpc64/power7/fpu/s_logb.c new file mode 100644
> index 0000000..ff3a9e0
> --- /dev/null
> +++ b/sysdeps/powerpc/powerpc64/power7/fpu/s_logb.c
> @@ -0,0 +1 @@
> +#include <sysdeps/powerpc/powerpc32/power7/fpu/s_logb.c>
> diff --git a/sysdeps/powerpc/powerpc64/power7/fpu/s_logbf.c
> b/sysdeps/powerpc/powerpc64/power7/fpu/s_logbf.c new file mode 100644
> index 0000000..e79a28f
> --- /dev/null
> +++ b/sysdeps/powerpc/powerpc64/power7/fpu/s_logbf.c
> @@ -0,0 +1 @@
> +#include <sysdeps/powerpc/powerpc32/power7/fpu/s_logbf.c>
> diff --git a/sysdeps/powerpc/powerpc64/power7/fpu/s_logbl.c
> b/sysdeps/powerpc/powerpc64/power7/fpu/s_logbl.c new file mode 100644
> index 0000000..463e411
> --- /dev/null
> +++ b/sysdeps/powerpc/powerpc64/power7/fpu/s_logbl.c
> @@ -0,0 +1 @@
> +#include <sysdeps/powerpc/powerpc32/power7/fpu/s_logbl.c>

-- 
 Andreas Jaeger aj@{suse.com,opensuse.org} Twitter/Identica: jaegerandi
  SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
   GF: Jeff Hawn,Jennifer Guild,Felix Imendörffer,HRB16746 (AG Nürnberg)
    GPG fingerprint = 93A3 365E CE47 B889 DF7F  FED1 389A 563C C272 A126


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]