This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH] PowerPC: Trigonometric optimizations for POWER
- From: Adhemerval Zanella <azanella at linux dot vnet dot ibm dot com>
- To: libc-alpha at sourceware dot org
- Date: Mon, 07 Nov 2011 18:27:26 -0200
- Subject: Re: [PATCH] PowerPC: Trigonometric optimizations for POWER
- References: <4E36F78A.50403@linux.vnet.ibm.com> <1312308652.7676.186.camel@localhost.localdomain> <4E41727D.6030700@linux.vnet.ibm.com> <m3ty8f467t.fsf@hase.home> <4EA70837.3090809@linux.vnet.ibm.com> <m3aa8odua1.fsf@hase.home>
On 10/26/2011 06:36 AM, Andreas Schwab wrote:
> Adhemerval Zanella <azanella@linux.vnet.ibm.com> writes:
>
>> diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h
>> index 90021c6..881e5eb 100644
>> --- a/sysdeps/powerpc/fpu/math_private.h
>> +++ b/sysdeps/powerpc/fpu/math_private.h
>> @@ -27,9 +27,90 @@
>>
>> # if __WORDSIZE == 64 || defined _ARCH_PWR4
>> # define __CPU_HAS_FSQRT 1
>> +
>> +static inline double
>> +__ieee754_sqrt_inline (double __x)
> Just define __ieee754_sqrt, see sysdeps/x86_64/fpu/math_private.h.
>
> Andreas.
>
Here it is:
PowerPC - Inline assembly version for FP functions
This patch creates inline assembly functions that use intrinsic PPC
floating point instructions when the platform supports them but rely on
the internal GLIBC functions when the instructions are not implemented
(for instance, on POWER4).
--
2011-11-07 Adhemerval Zanella <azanella@linux.vnet.ibm.com>
* sysdeps/powerpc/fpu/math_private.h: Using inline assembly version
of math functions ceil, trunc, floor, round, and sqrt, when
avaliable on the platform.
* sysdeps/powerpc/fpu/e_sqrt.c: Undefined __ieee754_sqrt to avoid
name clash.
* sysdeps/powerpc/fpu/e_sqrtf.c: Likewise.
* sysdeps/powerpc/powerpc64/fpu/e_sqrt.c: Likewise.
* sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c: Likewise.
diff --git a/sysdeps/powerpc/fpu/e_sqrt.c b/sysdeps/powerpc/fpu/e_sqrt.c
index f9ded25..d59bd08 100644
--- a/sysdeps/powerpc/fpu/e_sqrt.c
+++ b/sysdeps/powerpc/fpu/e_sqrt.c
@@ -154,6 +154,7 @@ __slow_ieee754_sqrt (double x)
return f_wash (x);
}
+#undef __ieee754_sqrt
double
__ieee754_sqrt (double x)
{
diff --git a/sysdeps/powerpc/fpu/e_sqrtf.c b/sysdeps/powerpc/fpu/e_sqrtf.c
index 965faee..9c6b860 100644
--- a/sysdeps/powerpc/fpu/e_sqrtf.c
+++ b/sysdeps/powerpc/fpu/e_sqrtf.c
@@ -130,7 +130,7 @@ __slow_ieee754_sqrtf (float x)
return f_washf (x);
}
-
+#undef __ieee754_sqrtf
float
__ieee754_sqrtf (float x)
{
diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h
index 90021c6..c4dd217 100644
--- a/sysdeps/powerpc/fpu/math_private.h
+++ b/sysdeps/powerpc/fpu/math_private.h
@@ -1,5 +1,5 @@
/* Private inline math functions for powerpc.
- Copyright (C) 2006
+ Copyright (C) 2006, 2011
Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -25,12 +25,145 @@
#include <ldsodefs.h>
#include <dl-procinfo.h>
+#include <math/math_private.h>
+
# if __WORDSIZE == 64 || defined _ARCH_PWR4
# define __CPU_HAS_FSQRT 1
+
+#ifndef __ieee754_sqrt
+# define __ieee754_sqrt(x) \
+ ({ double __z; \
+ __asm __volatile ( \
+ " fsqrt %0,%1\n" \
+ : "=f" (__z) \
+ : "f"(x)); \
+ __z; })
+#endif
+#ifndef __ieee754_sqrtf
+# define __ieee754_sqrtf(x) \
+ ({ float __z; \
+ __asm __volatile ( \
+ " fsqrts %0,%1\n" \
+ : "=f" (__z) \
+ : "f"(x)); \
+ __z; })
+#endif
+
# else
# define __CPU_HAS_FSQRT ((GLRO(dl_hwcap) & PPC_FEATURE_64) != 0)
+# endif // __WORDSIZE == 64 || defined _ARCH_PWR4
+
+
+#if defined _ARCH_PWR5X
+
+# ifndef __round
+# define __round(x) \
+ ({ double __z; \
+ __asm __volatile ( \
+ " frin %0,%1\n" \
+ : "=f" (__z) \
+ : "f" (x)); \
+ __z; })
+# endif
+# ifndef __roundf
+# define __roundf(x) \
+ ({ float __z; \
+ __asm __volatile ( \
+ " frin %0,%1\n" \
+ " frsp %0,%0\n" \
+ : "=f" (__z) \
+ : "f" (x)); \
+ __z; })
+# endif
+
+# ifndef __trunc
+# define __trunc(x) \
+ ({ double __z; \
+ __asm __volatile ( \
+ " friz %0,%1\n" \
+ : "=f" (__z) \
+ : "f" (x)); \
+ __z; })
+# endif
+# ifndef __truncf
+# define __truncf(x) \
+ ({ float __z; \
+ __asm __volatile ( \
+ " friz %0,%1\n" \
+ " frsp %0,%0\n" \
+ : "=f" (__z) \
+ : "f" (x)); \
+ __z; })
+# endif
+
+# ifndef __ceil
+# define __ceil(x) \
+ ({ double __z; \
+ __asm __volatile ( \
+ " frip %0,%1\n" \
+ : "=f" (__z) \
+ : "f" (x)); \
+ __z; })
+# endif
+# ifndef __ceilf
+# define __ceilf(x) \
+ ({ float __z; \
+ __asm __volatile ( \
+ " frip %0,%1\n" \
+ " frsp %0,%0\n" \
+ : "=f" (__z) \
+ : "f" (x)); \
+ __z; })
# endif
+# ifndef __floor
+# define __floor(x) \
+ ({ double __z; \
+ __asm __volatile ( \
+ " frim %0,%1\n" \
+ : "=f" (__z) \
+ : "f" (x)); \
+ __z; })
+# endif
+# ifndef __floorf
+# define __floorf(x) \
+ ({ float __z; \
+ __asm __volatile ( \
+ " frim %0,%1\n" \
+ " frsp %0,%0\n" \
+ : "=f" (__z) \
+ : "f" (x)); \
+ __z; })
+# endif
+
+#endif /* defined _ARCH_PWR5X */
+
+
+#if defined _ARCH_PWR6
+
+# ifndef __copysign
+# define __copysign(x, y) \
+ ({ double __z; \
+ __asm __volatile ( \
+ " fcpsgn %0,%1,%2\n" \
+ : "=f" (__z) \
+ : "f" (y), "f" (x)); \
+ __z; })
+# endif
+# ifndef __copysignf
+# define __copysignf(x, y) \
+ ({ float __z; \
+ __asm __volatile ( \
+ " fcpsgn %0,%1,%2\n" \
+ " frsp %0,%0\n" \
+ : "=f" (__z) \
+ : "f" (y), "f" (x)); \
+ __z; })
+# endif
+
+#endif /* defined _ARCH_PWR6 */
+
+
# ifndef __LIBC_INTERNAL_MATH_INLINES
extern double __slow_ieee754_sqrt (double);
__inline double
@@ -78,6 +211,4 @@ __ieee754_sqrtf (float __x)
}
#endif /* __LIBC_INTERNAL_MATH_INLINES */
-#include <math/math_private.h>
-
#endif /* _PPC_MATH_PRIVATE_H_ */
diff --git a/sysdeps/powerpc/powerpc64/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc64/fpu/e_sqrt.c
index 314abba..66d04ce 100644
--- a/sysdeps/powerpc/powerpc64/fpu/e_sqrt.c
+++ b/sysdeps/powerpc/powerpc64/fpu/e_sqrt.c
@@ -20,6 +20,7 @@
#include <math.h>
#include <math_private.h>
+#undef __ieee754_sqrt
double
__ieee754_sqrt (double x)
{
diff --git a/sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c
index 7157214..847a2e4 100644
--- a/sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c
+++ b/sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c
@@ -20,6 +20,7 @@
#include <math.h>
#include <math_private.h>
+#undef __ieee754_sqrtf
float
__ieee754_sqrtf (float x)
{
--
1.7.1