This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH] Use long for mantissa for generic mp code
- From: Siddhesh Poyarekar <siddhesh at redhat dot com>
- To: libc-alpha at sourceware dot org
- Date: Tue, 26 Feb 2013 21:12:06 +0530
- Subject: [PATCH] Use long for mantissa for generic mp code
Hi,
The mantissa for mp_no does not need to be double since it only stores
integral values. Keeping it as double and not integer puts a
performance penalty on x86 and (theoretically) on architectures that
don't have multiple FPUs. It is however advantageous for powerpc
since it has multiple fpus that it can exploit with its special
version of __mul.
Tha attached patch applies on top of the patch that consolidates mpa.c
into the single generic form. This patch defines new mantissa_t and
mantissa_store_t types that defaults to long and int64_t, overridden
by powerpc as double. Operations that depend on the 'doubleness' of
the mantissa have been separated out to allow integer variants (as
defaults) and double overrides (in powerpc).
I did a build test on powerpc to verify that the generated code
remains unchanged (only the debuginfo changes). On x86_64 I did a
build and testsuite run to verify that there were no regressions in
the testsuite resulting from this change.
Performance:
The change only makes a difference to x86_64 performance. Like in
earlier cases, I've done multiple iterations of the slowest path of
the pow function as a benchmark for comparison.
Without patch:
Total:1336451211, Fastest:126668, Slowest:619587, Avg:133645.121100
With patch:
Total:962687766, Fastest:92650, Slowest:475676, Avg:96268.776600
That makes it a 27.97% improvement in the average case and 26.86% in
the best case.
The patch goes on top of the patches I posted earlier to sync the
generic and powerpc copies of mpa.c.
Siddhesh
* sysdeps/ieee754/dbl-64/mpa-arch.h: New file.
* sysdeps/ieee754/dbl-64/mpa.c (norm): Use MANTISSA_T to store
computations on mantissa. Use macros for rounding and
division.
(denorm): Likewise.
(__dbl_mp): Likewise.
(add_magnitudes): Likewise.
(sub_magnitudes): Likewise.
(__mul): Likewise.
(__sqr): Likewise.
* sysdeps/ieee754/dbl-64/mpa.h: Include mpa-arch.h. Define
powers of two in terms of TWOPOW macro.
(mp_no): Make type of mantissa as MANTISSA_T.
[!RADIXI]: Define RADIXI.
[!TWO52]: Define TWO52.
* sysdeps/powerpc/powerpc32/power4/fpu/mpa-arch.h: New file.
* sysdeps/powerpc/powerpc64/power4/fpu/mpa-arch.h: New file.
diff --git a/sysdeps/ieee754/dbl-64/mpa-arch.h b/sysdeps/ieee754/dbl-64/mpa-arch.h
new file mode 100644
index 0000000..45a0b2d
--- /dev/null
+++ b/sysdeps/ieee754/dbl-64/mpa-arch.h
@@ -0,0 +1,43 @@
+/* Overridable constants and operations.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#include <stdint.h>
+
+typedef long mantissa_t;
+typedef int64_t mantissa_store_t;
+
+#define TWOPOW(i) (1 << i)
+
+#define RADIX TWOPOW (24) /* 2^24 */
+
+/* Divide D by RADIX and put the remainder in R. */
+#define DIV_RADIX(d, r) \
+ ({ \
+ r = d % RADIX; \
+ d /= RADIX; \
+ })
+
+/* Put the integer component of a double X in R and retain the fraction in
+ X. */
+#define INTEGER_OF(x, r) \
+ ({ \
+ r = (long) x; \
+ x -= r; \
+ })
+
+/* Truncate IN to a multiple of F, where F is a power of two. */
+#define TRUNCATE_TO_MUL(in, f) ((in) & ~(f))
+
diff --git a/sysdeps/ieee754/dbl-64/mpa.c b/sysdeps/ieee754/dbl-64/mpa.c
index 0766476..308ae07 100644
--- a/sysdeps/ieee754/dbl-64/mpa.c
+++ b/sysdeps/ieee754/dbl-64/mpa.c
@@ -125,7 +125,8 @@ norm (const mp_no *x, double *y, int p)
{
#define R RADIXI
long i;
- double a, c, u, v, z[5];
+ double c;
+ mantissa_t a, u, v, z[5];
if (p < 5)
{
if (p == 1)
@@ -147,17 +148,14 @@ norm (const mp_no *x, double *y, int p)
for (i = 2; i < 5; i++)
{
- z[i] = X[i] * a;
- u = (z[i] + CUTTER) - CUTTER;
- if (u > z[i])
- u -= RADIX;
- z[i] -= u;
- z[i - 1] += u * RADIXI;
+ mantissa_t d, r;
+ d = X[i] * a;
+ DIV_RADIX (d, r);
+ z[i] = r;
+ z[i - 1] += d;
}
- u = (z[3] + TWO71) - TWO71;
- if (u > z[3])
- u -= TWO19;
+ u = TRUNCATE_TO_MUL (z[3], TWO19);
v = z[3] - u;
if (v == TWO18)
@@ -200,7 +198,8 @@ denorm (const mp_no *x, double *y, int p)
{
long i, k;
long p2 = p;
- double c, u, z[5];
+ double c;
+ mantissa_t u, z[5];
#define R RADIXI
if (EX < -44 || (EX == -44 && X[1] < TWO5))
@@ -280,9 +279,7 @@ denorm (const mp_no *x, double *y, int p)
z[3] = X[k];
}
- u = (z[3] + TWO57) - TWO57;
- if (u > z[3])
- u -= TWO5;
+ u = TRUNCATE_TO_MUL (z[3], TWO5);
if (u == z[3])
{
@@ -330,7 +327,6 @@ __dbl_mp (double x, mp_no *y, int p)
{
long i, n;
long p2 = p;
- double u;
/* Sign. */
if (x == ZERO)
@@ -356,11 +352,7 @@ __dbl_mp (double x, mp_no *y, int p)
n = MIN (p2, 4);
for (i = 1; i <= n; i++)
{
- u = (x + TWO52) - TWO52;
- if (u > x)
- u -= ONE;
- Y[i] = u;
- x -= u;
+ INTEGER_OF (x, Y[i]);
x *= RADIX;
}
for (; i <= p2; i++)
@@ -377,7 +369,7 @@ add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
{
long i, j, k;
long p2 = p;
- double zk;
+ mantissa_t zk;
EZ = EX;
@@ -445,7 +437,7 @@ sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
{
long i, j, k;
long p2 = p;
- double zk;
+ mantissa_t zk;
EZ = EX;
i = p2;
@@ -621,9 +613,9 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
{
long i, j, k, ip, ip2;
long p2 = p;
- double u, zk;
+ mantissa_store_t zk;
const mp_no *a;
- double *diag;
+ mantissa_store_t *diag;
/* Is z=0? */
if (__glibc_unlikely (X[0] * Y[0] == ZERO))
@@ -680,8 +672,8 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
/* Precompute sums of diagonal elements so that we can directly use them
later. See the next comment to know we why need them. */
- diag = alloca (k * sizeof (double));
- double d = ZERO;
+ diag = alloca (k * sizeof (mantissa_store_t));
+ mantissa_store_t d = ZERO;
for (i = 1; i <= ip; i++)
{
d += X[i] * Y[i];
@@ -704,11 +696,8 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
zk -= diag[k - 1];
- u = (zk + CUTTER) - CUTTER;
- if (u > zk)
- u -= RADIX;
- Z[k--] = zk - u;
- zk = u * RADIXI;
+ DIV_RADIX (zk, Z[k]);
+ k--;
}
/* The real deal. Mantissa digit Z[k] is the sum of all X[i] * Y[j] where i
@@ -738,11 +727,8 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
zk -= diag[k - 1];
- u = (zk + CUTTER) - CUTTER;
- if (u > zk)
- u -= RADIX;
- Z[k--] = zk - u;
- zk = u * RADIXI;
+ DIV_RADIX (zk, Z[k]);
+ k--;
}
Z[k] = zk;
@@ -774,7 +760,7 @@ SECTION
__sqr (const mp_no *x, mp_no *y, int p)
{
long i, j, k, ip;
- double u, yk;
+ mantissa_store_t yk;
/* Is z=0? */
if (__glibc_unlikely (X[0] == ZERO))
@@ -798,7 +784,7 @@ __sqr (const mp_no *x, mp_no *y, int p)
while (k > p)
{
- double yk2 = 0.0;
+ mantissa_store_t yk2 = 0;
long lim = k / 2;
if (k % 2 == 0)
@@ -818,16 +804,13 @@ __sqr (const mp_no *x, mp_no *y, int p)
yk += 2.0 * yk2;
- u = (yk + CUTTER) - CUTTER;
- if (u > yk)
- u -= RADIX;
- Y[k--] = yk - u;
- yk = u * RADIXI;
+ DIV_RADIX (yk, Y[k]);
+ k--;
}
while (k > 1)
{
- double yk2 = 0.0;
+ mantissa_store_t yk2 = 0;
long lim = k / 2;
if (k % 2 == 0)
@@ -839,11 +822,8 @@ __sqr (const mp_no *x, mp_no *y, int p)
yk += 2.0 * yk2;
- u = (yk + CUTTER) - CUTTER;
- if (u > yk)
- u -= RADIX;
- Y[k--] = yk - u;
- yk = u * RADIXI;
+ DIV_RADIX (yk, Y[k]);
+ k--;
}
Y[k] = yk;
diff --git a/sysdeps/ieee754/dbl-64/mpa.h b/sysdeps/ieee754/dbl-64/mpa.h
index 168b334..54044a0 100644
--- a/sysdeps/ieee754/dbl-64/mpa.h
+++ b/sysdeps/ieee754/dbl-64/mpa.h
@@ -35,6 +35,7 @@
/* Common types and definition */
/************************************************************************/
+#include <mpa-arch.h>
/* The mp_no structure holds the details of a multi-precision floating point
number.
@@ -61,7 +62,7 @@
typedef struct
{
int e;
- double d[40];
+ mantissa_t d[40];
} mp_no;
typedef union
@@ -82,9 +83,13 @@ extern const mp_no mptwo;
#define ABS(x) ((x) < 0 ? -(x) : (x))
-#define RADIX 0x1.0p24 /* 2^24 */
-#define RADIXI 0x1.0p-24 /* 2^-24 */
-#define CUTTER 0x1.0p76 /* 2^76 */
+#ifndef RADIXI
+# define RADIXI 0x1.0p-24 /* 2^-24 */
+#endif
+
+#ifndef TWO52
+# define TWO52 0x1.0p52 /* 2^52 */
+#endif
#define ZERO 0.0 /* 0 */
#define MZERO -0.0 /* 0 with the sign bit set */
@@ -92,13 +97,13 @@ extern const mp_no mptwo;
#define MONE -1.0 /* -1 */
#define TWO 2.0 /* 2 */
-#define TWO5 0x1.0p5 /* 2^5 */
-#define TWO8 0x1.0p8 /* 2^52 */
-#define TWO10 0x1.0p10 /* 2^10 */
-#define TWO18 0x1.0p18 /* 2^18 */
-#define TWO19 0x1.0p19 /* 2^19 */
-#define TWO23 0x1.0p23 /* 2^23 */
-#define TWO52 0x1.0p52 /* 2^52 */
+#define TWO5 TWOPOW (5) /* 2^5 */
+#define TWO8 TWOPOW (8) /* 2^52 */
+#define TWO10 TWOPOW (10) /* 2^10 */
+#define TWO18 TWOPOW (18) /* 2^18 */
+#define TWO19 TWOPOW (19) /* 2^19 */
+#define TWO23 TWOPOW (23) /* 2^23 */
+
#define TWO57 0x1.0p57 /* 2^57 */
#define TWO71 0x1.0p71 /* 2^71 */
#define TWOM1032 0x1.0p-1032 /* 2^-1032 */
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/mpa-arch.h b/sysdeps/powerpc/powerpc32/power4/fpu/mpa-arch.h
new file mode 100644
index 0000000..d86a1c6
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/mpa-arch.h
@@ -0,0 +1,56 @@
+/* Overridable constants and operations.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+typedef double mantissa_t;
+typedef double mantissa_store_t;
+
+#define TWOPOW(i) (0x1.0p##i)
+
+#define RADIX TWOPOW (24) /* 2^24 */
+#define CUTTER TWOPOW (76) /* 2^76 */
+#define RADIXI 0x1.0p-24 /* 2^-24 */
+#define TWO52 TWOPOW (52) /* 2^52 */
+
+/* Divide D by RADIX and put the remainder in R. */
+#define DIV_RADIX(d,r) \
+ ({ \
+ double u = ((d) + CUTTER) - CUTTER; \
+ if (u > (d)) \
+ u -= RADIX; \
+ r = (d) - u; \
+ (d) = u * RADIXI; \
+ })
+
+/* Put the integer component of a double X in R and retain the fraction in
+ X. */
+#define INTEGER_OF(x, r) \
+ ({ \
+ double u = ((x) + TWO52) - TWO52; \
+ if (u > (x)) \
+ u -= ONE; \
+ (r) = u; \
+ (x) -= u; \
+ })
+
+/* Truncate IN to a multiple of F, where F is a power of two. */
+#define TRUNCATE_TO_MUL(in, f) \
+ ({ \
+ double factor = f * TWO52; \
+ double u = (in + factor) - factor; \
+ if (u > in) \
+ u -= f; \
+ u; \
+ })
diff --git a/sysdeps/powerpc/powerpc64/power4/fpu/mpa-arch.h b/sysdeps/powerpc/powerpc64/power4/fpu/mpa-arch.h
new file mode 100644
index 0000000..d86a1c6
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power4/fpu/mpa-arch.h
@@ -0,0 +1,56 @@
+/* Overridable constants and operations.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+typedef double mantissa_t;
+typedef double mantissa_store_t;
+
+#define TWOPOW(i) (0x1.0p##i)
+
+#define RADIX TWOPOW (24) /* 2^24 */
+#define CUTTER TWOPOW (76) /* 2^76 */
+#define RADIXI 0x1.0p-24 /* 2^-24 */
+#define TWO52 TWOPOW (52) /* 2^52 */
+
+/* Divide D by RADIX and put the remainder in R. */
+#define DIV_RADIX(d,r) \
+ ({ \
+ double u = ((d) + CUTTER) - CUTTER; \
+ if (u > (d)) \
+ u -= RADIX; \
+ r = (d) - u; \
+ (d) = u * RADIXI; \
+ })
+
+/* Put the integer component of a double X in R and retain the fraction in
+ X. */
+#define INTEGER_OF(x, r) \
+ ({ \
+ double u = ((x) + TWO52) - TWO52; \
+ if (u > (x)) \
+ u -= ONE; \
+ (r) = u; \
+ (x) -= u; \
+ })
+
+/* Truncate IN to a multiple of F, where F is a power of two. */
+#define TRUNCATE_TO_MUL(in, f) \
+ ({ \
+ double factor = f * TWO52; \
+ double u = (in + factor) - factor; \
+ if (u > in) \
+ u -= f; \
+ u; \
+ })