This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Use .S sources for x86/x86_64 expl


The x86/x86_64 implementation of expl uses a long inline asm inside a
C source file.

I don't think this is generally a good practice, it may offer some
marginal convenience for accessing operands where the precise syntax
varies between PIC and non-PIC, or x86 and x86_64, but it makes the
code less flexible and harder to modify than either plain C code or a
pure .S file, and we have macros to simplify writing such things as
PIC accesses in .S files.  When you want to add additional constant
operands, or checks directly on the binary representation of the
floating-point value, as part of fixing bugs, requiring everything to
be specified as asm operands rather than just writing the desired
instruction sequences directly rather gets in the way.

Thus, I propose this patch which moves expl to using separate .S files
for x86 and x86_64, the usual approach for the bulk of the other
x86/x86_64 long double functions with assembly implementations, which
works fine for them.  Tested x86 and x86_64.

2012-05-05  Joseph Myers  <joseph@codesourcery.com>

	* sysdeps/i386/fpu/e_expl.c: Move to ...
	* sysdeps/i386/fpu/e_expl.S: ... here.  Write directly in assembly
	rather than using inline asm.
	* sysdeps/x86_64/fpu/e_expl.c: Remove file.
	* sysdeps/x86_64/fpu/e_expl.S: Copy from
	sysdeps/i386/fpu/e_expl.S, adjusted for x86_64.

diff --git a/sysdeps/i386/fpu/e_expl.S b/sysdeps/i386/fpu/e_expl.S
new file mode 100644
index 0000000..a492c29
--- /dev/null
+++ b/sysdeps/i386/fpu/e_expl.S
@@ -0,0 +1,92 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ *
+ * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>.
+ */
+
+/*
+ * The 8087 method for the exponential function is to calculate
+ *   exp(x) = 2^(x log2(e))
+ * after separating integer and fractional parts
+ *   x log2(e) = i + f, |f| <= .5
+ * 2^i is immediate but f needs to be precise for long double accuracy.
+ * Suppress range reduction error in computing f by the following.
+ * Separate x into integer and fractional parts
+ *   x = xi + xf, |xf| <= .5
+ * Separate log2(e) into the sum of an exact number c0 and small part c1.
+ *   c0 + c1 = log2(e) to extra precision
+ * Then
+ *   f = (c0 xi - i) + c0 xf + c1 x
+ * where c0 xi is exact and so also is (c0 xi - i).
+ * -- moshier@na-net.ornl.gov
+ */
+
+#include <machine/asm.h>
+
+	.section .rodata.cst16,"aM",@progbits,16
+
+	.p2align 4
+	ASM_TYPE_DIRECTIVE(c0,@object)
+c0:	.byte 0, 0, 0, 0, 0, 0, 0xaa, 0xb8, 0xff, 0x3f
+	.byte 0, 0, 0, 0, 0, 0
+	ASM_SIZE_DIRECTIVE(c0)
+	ASM_TYPE_DIRECTIVE(c1,@object)
+c1:	.byte 0x20, 0xfa, 0xee, 0xc2, 0x5f, 0x70, 0xa5, 0xec, 0xed, 0x3f
+	.byte 0, 0, 0, 0, 0, 0
+	ASM_SIZE_DIRECTIVE(c1)
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%ecx)
+#else
+# define MO(op) op
+#endif
+
+	.text
+ENTRY(__ieee754_expl)
+	fldt	4(%esp)
+/* I added the following ugly construct because expl(+-Inf) resulted
+   in NaN.  The ugliness results from the bright minds at Intel.
+   For the i686 the code can be written better.
+   -- drepper@cygnus.com.  */
+	fxam			/* Is NaN or +-Inf?  */
+#ifdef PIC
+	LOAD_PIC_REG (cx)
+#endif
+	fstsw	%ax
+	movb	$0x45, %dh
+	andb	%ah, %dh
+	cmpb	$0x05, %dh
+	je	1f		/* Is +-Inf, jump.    */
+	fldl2e			/* 1  log2(e)         */
+	fmul	%st(1), %st	/* 1  x log2(e)       */
+	frndint			/* 1  i               */
+	fld	%st(1)		/* 2  x               */
+	frndint			/* 2  xi              */
+	fld	%st(1)		/* 3  i               */
+	fldt	MO(c0)		/* 4  c0              */
+	fld	%st(2)		/* 5  xi              */
+	fmul	%st(1), %st	/* 5  c0 xi           */
+	fsubp	%st, %st(2)	/* 4  f = c0 xi  - i  */
+	fld	%st(4)		/* 5  x               */
+	fsub	%st(3), %st	/* 5  xf = x - xi     */
+	fmulp	%st, %st(1)	/* 4  c0 xf           */
+	faddp	%st, %st(1)	/* 3  f = f + c0 xf   */
+	fldt	MO(c1)		/* 4                  */
+	fmul	%st(4), %st	/* 4  c1 * x          */
+	faddp	%st, %st(1)	/* 3  f = f + c1 * x  */
+	f2xm1			/* 3 2^(fract(x * log2(e))) - 1 */
+	fld1			/* 4 1.0              */
+	faddp			/* 3 2^(fract(x * log2(e))) */
+	fstp	%st(1)		/* 2  */
+	fscale			/* 2 scale factor is st(1); e^x */
+	fstp	%st(1)		/* 1  */
+	fstp	%st(1)		/* 0  */
+	jmp	2f
+1:	testl	$0x200, %eax	/* Test sign.  */
+	jz	2f		/* If positive, jump.  */
+	fstp	%st
+	fldz			/* Set result to 0.  */
+2:	ret
+END(__ieee754_expl)
+strong_alias (__ieee754_expl, __expl_finite)
diff --git a/sysdeps/i386/fpu/e_expl.c b/sysdeps/i386/fpu/e_expl.c
deleted file mode 100644
index 8dc9581..0000000
--- a/sysdeps/i386/fpu/e_expl.c
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Written by J.T. Conklin <jtc@netbsd.org>.
- * Public domain.
- *
- * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>.
- */
-
-/*
- * The 8087 method for the exponential function is to calculate
- *   exp(x) = 2^(x log2(e))
- * after separating integer and fractional parts
- *   x log2(e) = i + f, |f| <= .5
- * 2^i is immediate but f needs to be precise for long double accuracy.
- * Suppress range reduction error in computing f by the following.
- * Separate x into integer and fractional parts
- *   x = xi + xf, |xf| <= .5
- * Separate log2(e) into the sum of an exact number c0 and small part c1.
- *   c0 + c1 = log2(e) to extra precision
- * Then
- *   f = (c0 xi - i) + c0 xf + c1 x
- * where c0 xi is exact and so also is (c0 xi - i).
- * -- moshier@na-net.ornl.gov
- */
-
-#include <math_private.h>
-
-static const long double c0 = 1.44268798828125L;
-static const long double c1 = 7.05260771340735992468e-6L;
-
-long double
-__ieee754_expl (long double x)
-{
-  long double res;
-
-/* I added the following ugly construct because expl(+-Inf) resulted
-   in NaN.  The ugliness results from the bright minds at Intel.
-   For the i686 the code can be written better.
-   -- drepper@cygnus.com.  */
-  asm ("fxam\n\t"		/* Is NaN or +-Inf?  */
-       "fstsw	%%ax\n\t"
-       "movb	$0x45, %%dh\n\t"
-       "andb	%%ah, %%dh\n\t"
-       "cmpb	$0x05, %%dh\n\t"
-       "je	1f\n\t"		/* Is +-Inf, jump.    */
-       "fldl2e\n\t"             /* 1  log2(e)         */
-       "fmul %%st(1),%%st\n\t"  /* 1  x log2(e)       */
-       "frndint\n\t"            /* 1  i               */
-       "fld %%st(1)\n\t"        /* 2  x               */
-       "frndint\n\t"            /* 2  xi              */
-       "fld %%st(1)\n\t"        /* 3  i               */
-       "fldt %2\n\t"            /* 4  c0              */
-       "fld %%st(2)\n\t"        /* 5  xi              */
-       "fmul %%st(1),%%st\n\t"  /* 5  c0 xi           */
-       "fsubp %%st,%%st(2)\n\t" /* 4  f = c0 xi  - i  */
-       "fld %%st(4)\n\t"        /* 5  x               */
-       "fsub %%st(3),%%st\n\t"  /* 5  xf = x - xi     */
-       "fmulp %%st,%%st(1)\n\t" /* 4  c0 xf           */
-       "faddp %%st,%%st(1)\n\t" /* 3  f = f + c0 xf   */
-       "fldt %3\n\t"            /* 4                  */
-       "fmul %%st(4),%%st\n\t"  /* 4  c1 * x          */
-       "faddp %%st,%%st(1)\n\t" /* 3  f = f + c1 * x  */
-       "f2xm1\n\t"		/* 3 2^(fract(x * log2(e))) - 1 */
-       "fld1\n\t"               /* 4 1.0              */
-       "faddp\n\t"		/* 3 2^(fract(x * log2(e))) */
-       "fstp	%%st(1)\n\t"    /* 2  */
-       "fscale\n\t"		/* 2 scale factor is st(1); e^x */
-       "fstp	%%st(1)\n\t"    /* 1  */
-       "fstp	%%st(1)\n\t"    /* 0  */
-       "jmp 2f\n\t"
-       "1:\ttestl	$0x200, %%eax\n\t"	/* Test sign.  */
-       "jz	2f\n\t"		/* If positive, jump.  */
-       "fstp	%%st\n\t"
-       "fldz\n\t"		/* Set result to 0.  */
-       "2:\t\n"
-       : "=t" (res) : "0" (x), "m" (c0), "m" (c1) : "ax", "dx");
-  return res;
-}
-strong_alias (__ieee754_expl, __expl_finite)
diff --git a/sysdeps/x86_64/fpu/e_expl.S b/sysdeps/x86_64/fpu/e_expl.S
new file mode 100644
index 0000000..c2d1d40
--- /dev/null
+++ b/sysdeps/x86_64/fpu/e_expl.S
@@ -0,0 +1,89 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ *
+ * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>.
+ */
+
+/*
+ * The 8087 method for the exponential function is to calculate
+ *   exp(x) = 2^(x log2(e))
+ * after separating integer and fractional parts
+ *   x log2(e) = i + f, |f| <= .5
+ * 2^i is immediate but f needs to be precise for long double accuracy.
+ * Suppress range reduction error in computing f by the following.
+ * Separate x into integer and fractional parts
+ *   x = xi + xf, |xf| <= .5
+ * Separate log2(e) into the sum of an exact number c0 and small part c1.
+ *   c0 + c1 = log2(e) to extra precision
+ * Then
+ *   f = (c0 xi - i) + c0 xf + c1 x
+ * where c0 xi is exact and so also is (c0 xi - i).
+ * -- moshier@na-net.ornl.gov
+ */
+
+#include <machine/asm.h>
+
+	.section .rodata.cst16,"aM",@progbits,16
+
+	.p2align 4
+	ASM_TYPE_DIRECTIVE(c0,@object)
+c0:	.byte 0, 0, 0, 0, 0, 0, 0xaa, 0xb8, 0xff, 0x3f
+	.byte 0, 0, 0, 0, 0, 0
+	ASM_SIZE_DIRECTIVE(c0)
+	ASM_TYPE_DIRECTIVE(c1,@object)
+c1:	.byte 0x20, 0xfa, 0xee, 0xc2, 0x5f, 0x70, 0xa5, 0xec, 0xed, 0x3f
+	.byte 0, 0, 0, 0, 0, 0
+	ASM_SIZE_DIRECTIVE(c1)
+
+#ifdef PIC
+# define MO(op) op##(%rip)
+#else
+# define MO(op) op
+#endif
+
+	.text
+ENTRY(__ieee754_expl)
+	fldt	8(%rsp)
+/* I added the following ugly construct because expl(+-Inf) resulted
+   in NaN.  The ugliness results from the bright minds at Intel.
+   For the i686 the code can be written better.
+   -- drepper@cygnus.com.  */
+	fxam			/* Is NaN or +-Inf?  */
+	fstsw	%ax
+	movb	$0x45, %dh
+	andb	%ah, %dh
+	cmpb	$0x05, %dh
+	je	1f		/* Is +-Inf, jump.    */
+	fldl2e			/* 1  log2(e)         */
+	fmul	%st(1), %st	/* 1  x log2(e)       */
+	frndint			/* 1  i               */
+	fld	%st(1)		/* 2  x               */
+	frndint			/* 2  xi              */
+	fld	%st(1)		/* 3  i               */
+	fldt	MO(c0)		/* 4  c0              */
+	fld	%st(2)		/* 5  xi              */
+	fmul	%st(1), %st	/* 5  c0 xi           */
+	fsubp	%st, %st(2)	/* 4  f = c0 xi  - i  */
+	fld	%st(4)		/* 5  x               */
+	fsub	%st(3), %st	/* 5  xf = x - xi     */
+	fmulp	%st, %st(1)	/* 4  c0 xf           */
+	faddp	%st, %st(1)	/* 3  f = f + c0 xf   */
+	fldt	MO(c1)		/* 4                  */
+	fmul	%st(4), %st	/* 4  c1 * x          */
+	faddp	%st, %st(1)	/* 3  f = f + c1 * x  */
+	f2xm1			/* 3 2^(fract(x * log2(e))) - 1 */
+	fld1			/* 4 1.0              */
+	faddp			/* 3 2^(fract(x * log2(e))) */
+	fstp	%st(1)		/* 2  */
+	fscale			/* 2 scale factor is st(1); e^x */
+	fstp	%st(1)		/* 1  */
+	fstp	%st(1)		/* 0  */
+	jmp	2f
+1:	testl	$0x200, %eax	/* Test sign.  */
+	jz	2f		/* If positive, jump.  */
+	fstp	%st
+	fldz			/* Set result to 0.  */
+2:	ret
+END(__ieee754_expl)
+strong_alias (__ieee754_expl, __expl_finite)
diff --git a/sysdeps/x86_64/fpu/e_expl.c b/sysdeps/x86_64/fpu/e_expl.c
deleted file mode 100644
index 5042e02..0000000
--- a/sysdeps/x86_64/fpu/e_expl.c
+++ /dev/null
@@ -1 +0,0 @@
-#include "sysdeps/i386/fpu/e_expl.c"

-- 
Joseph S. Myers
joseph@codesourcery.com


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]