This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH 4/8] powerpc: Convert __ieee754_sqrt{,f} from macros toinlines.


On Wed, 2012-03-07 at 14:10 -0800, Richard Henderson wrote:
> 	* sysdeps/powerpc/fpu/math_private.h (__ieee754_sqrt): Convert
> 	from macro to inline function; merge with the
> 	!__LIBC_INTERNAL_MATH_INLINES version.
> 	(__ieee754_sqrtf): Likewise.
> 
> Cc: Ryan S. Arnold <rsa@us.ibm.com>
> ---
>  sysdeps/powerpc/fpu/math_private.h |  106 ++++++++++++------------------------
>  1 files changed, 36 insertions(+), 70 deletions(-)
> 
> diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h
> index 7bacecb..6174bf8 100644
> --- a/sysdeps/powerpc/fpu/math_private.h
> +++ b/sysdeps/powerpc/fpu/math_private.h
> @@ -23,35 +23,49 @@
>  #include <sysdep.h>
>  #include <ldsodefs.h>
>  #include <dl-procinfo.h>
> -
>  #include <math/math_private.h>
> 
>  # if __WORDSIZE == 64 || defined _ARCH_PWR4
>  #  define __CPU_HAS_FSQRT 1
> -
> -#ifndef __ieee754_sqrt
> -# define __ieee754_sqrt(x)		\
> -  ({ double __z;			\
> -     __asm __volatile (			\
> -	"	fsqrt %0,%1\n"		\
> -		: "=f" (__z)		\
> -		: "f"(x));		\
> -     __z; })
> -#endif
> -#ifndef __ieee754_sqrtf
> -# define __ieee754_sqrtf(x)		\
> -  ({ float __z;				\
> -     __asm __volatile (			\
> -	"	fsqrts %0,%1\n"		\
> -		: "=f" (__z)		\
> -		: "f"(x));		\
> -     __z; })
> -#endif
> -
>  # else
>  #  define __CPU_HAS_FSQRT ((GLRO(dl_hwcap) & PPC_FEATURE_64) != 0)
> -# endif	// __WORDSIZE == 64 || defined _ARCH_PWR4
> +# endif
> +
> +extern double __slow_ieee754_sqrt (double);
> +extern inline double
> +__ieee754_sqrt (double __x)
> +{
> +  double __z;
> 
> +  if (__CPU_HAS_FSQRT)
> +    {
> +      /* Volatile is required to prevent the compiler from moving the
> +         fsqrt instruction above the branch.  */
> +      __asm __volatile ("fsqrt	%0,%1" : "=f" (__z) : "f" (__x));
> +    }
> +  else
> +     __z = __slow_ieee754_sqrt(__x);
> +
> +  return __z;
> +}
> +
> +extern float __slow_ieee754_sqrtf (float);
> +extern inline float
> +__ieee754_sqrtf (float __x)
> +{
> +  float __z;
> +
> +  if (__CPU_HAS_FSQRT)
> +    {
> +      /* Volatile is required to prevent the compiler from moving the
> +         fsqrts instruction above the branch.  */
> +      __asm __volatile ("fsqrts	%0,%1" : "=f" (__z) : "f" (__x));
> +    }
> +  else
> +     __z = __slow_ieee754_sqrtf(__x);
> +
> +  return __z;
> +}
> 
>  #if defined _ARCH_PWR5X
> 
> @@ -162,52 +176,4 @@
> 
>  #endif /* defined _ARCH_PWR6 */
> 
> -
> -# ifndef __LIBC_INTERNAL_MATH_INLINES
> -extern double __slow_ieee754_sqrt (double);
> -__inline double
> -__ieee754_sqrt (double __x)
> -{
> -  double __z;
> -
> -  /* If the CPU is 64-bit we can use the optional FP instructions.  */
> -  if (__CPU_HAS_FSQRT)
> -  {
> -    /* Volatile is required to prevent the compiler from moving the
> -       fsqrt instruction above the branch.  */
> -     __asm __volatile (
> -	"	fsqrt	%0,%1\n"
> -		: "=f" (__z)
> -		: "f" (__x));
> -  }
> -  else
> -     __z = __slow_ieee754_sqrt(__x);
> -
> -  return __z;
> -}
> -
> -extern float __slow_ieee754_sqrtf (float);
> -
> -__inline float
> -__ieee754_sqrtf (float __x)
> -{
> -  float __z;
> -
> -  /* If the CPU is 64-bit we can use the optional FP instructions.  */
> -  if (__CPU_HAS_FSQRT)
> -  {
> -    /* Volatile is required to prevent the compiler from moving the
> -       fsqrts instruction above the branch.  */
> -     __asm __volatile (
> -	"	fsqrts	%0,%1\n"
> -		: "=f" (__z)
> -		: "f" (__x));
> -  }
> -  else
> -     __z = __slow_ieee754_sqrtf(__x);
> -
> -  return __z;
> -}
> -#endif /* __LIBC_INTERNAL_MATH_INLINES */
> -
>  #endif /* _PPC_MATH_PRIVATE_H_ */

Hi Richard,

I'm fine with these changes.  Adhemerval looked at the code gen of these
on Power7 and the fsqrt was generated without a branch test so we're
satisfied.

Thanks, 

Ryan


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]