This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
[PATCH] Use long for mantissa for generic mp code

From: Siddhesh Poyarekar <siddhesh at redhat dot com>
To: libc-alpha at sourceware dot org
Date: Tue, 26 Feb 2013 21:12:06 +0530
Subject: [PATCH] Use long for mantissa for generic mp code
Hi,

The mantissa for mp_no does not need to be double since it only stores
integral values.  Keeping it as double and not integer puts a
performance penalty on x86 and (theoretically) on architectures that
don't have multiple FPUs.  It is however advantageous for powerpc
since it has multiple fpus that it can exploit with its special
version of __mul.

Tha attached patch applies on top of the patch that consolidates mpa.c
into the single generic form.  This patch defines new mantissa_t and
mantissa_store_t types that defaults to long and int64_t, overridden
by powerpc as double.  Operations that depend on the 'doubleness' of
the mantissa have been separated out to allow integer variants (as
defaults) and double overrides (in powerpc).

I did a build test on powerpc to verify that the generated code
remains unchanged (only the debuginfo changes).  On x86_64 I did a
build and testsuite run to verify that there were no regressions in
the testsuite resulting from this change.

Performance:

The change only makes a difference to x86_64 performance.  Like in
earlier cases, I've done multiple iterations of the slowest path of
the pow function as a benchmark for comparison.

Without patch:

Total:1336451211, Fastest:126668, Slowest:619587, Avg:133645.121100

With patch:

Total:962687766, Fastest:92650, Slowest:475676, Avg:96268.776600

That makes it a 27.97% improvement in the average case and 26.86% in
the best case.

The patch goes on top of the patches I posted earlier to sync the
generic and powerpc copies of mpa.c.

Siddhesh

	* sysdeps/ieee754/dbl-64/mpa-arch.h: New file.
	* sysdeps/ieee754/dbl-64/mpa.c (norm): Use MANTISSA_T to store
	computations on mantissa.  Use macros for rounding and
	division.
	(denorm): Likewise.
	(__dbl_mp): Likewise.
	(add_magnitudes): Likewise.
	(sub_magnitudes): Likewise.
	(__mul): Likewise.
	(__sqr): Likewise.
	* sysdeps/ieee754/dbl-64/mpa.h: Include mpa-arch.h.  Define
	powers of two in terms of TWOPOW macro.
	(mp_no): Make type of mantissa as MANTISSA_T.
	[!RADIXI]: Define RADIXI.
	[!TWO52]: Define TWO52.
	* sysdeps/powerpc/powerpc32/power4/fpu/mpa-arch.h: New file.
	* sysdeps/powerpc/powerpc64/power4/fpu/mpa-arch.h: New file.


diff --git a/sysdeps/ieee754/dbl-64/mpa-arch.h b/sysdeps/ieee754/dbl-64/mpa-arch.h
new file mode 100644
index 0000000..45a0b2d
--- /dev/null
+++ b/sysdeps/ieee754/dbl-64/mpa-arch.h
@@ -0,0 +1,43 @@
+/* Overridable constants and operations.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU Lesser General Public License as published by
+   the Free Software Foundation; either version 2.1 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.  */
+
+#include <stdint.h>
+
+typedef long mantissa_t;
+typedef int64_t mantissa_store_t;
+
+#define TWOPOW(i) (1 << i)
+
+#define  RADIX     TWOPOW (24)		/* 2^24    */
+
+/* Divide D by RADIX and put the remainder in R.  */
+#define DIV_RADIX(d, r) \
+  ({									      \
+    r = d % RADIX;							      \
+    d /= RADIX;								      \
+  })
+
+/* Put the integer component of a double X in R and retain the fraction in
+   X.  */
+#define INTEGER_OF(x, r) \
+  ({									      \
+    r = (long) x;							      \
+    x -= r;								      \
+  })
+
+/* Truncate IN to a multiple of F, where F is a power of two.  */
+#define TRUNCATE_TO_MUL(in, f) ((in) & ~(f))
+
diff --git a/sysdeps/ieee754/dbl-64/mpa.c b/sysdeps/ieee754/dbl-64/mpa.c
index 0766476..308ae07 100644
--- a/sysdeps/ieee754/dbl-64/mpa.c
+++ b/sysdeps/ieee754/dbl-64/mpa.c
@@ -125,7 +125,8 @@ norm (const mp_no *x, double *y, int p)
 {
 #define R RADIXI
   long i;
-  double a, c, u, v, z[5];
+  double c;
+  mantissa_t a, u, v, z[5];
   if (p < 5)
     {
       if (p == 1)
@@ -147,17 +148,14 @@ norm (const mp_no *x, double *y, int p)
 
       for (i = 2; i < 5; i++)
 	{
-	  z[i] = X[i] * a;
-	  u = (z[i] + CUTTER) - CUTTER;
-	  if (u > z[i])
-	    u -= RADIX;
-	  z[i] -= u;
-	  z[i - 1] += u * RADIXI;
+	  mantissa_t d, r;
+	  d = X[i] * a;
+	  DIV_RADIX (d, r);
+	  z[i] = r;
+	  z[i - 1] += d;
 	}
 
-      u = (z[3] + TWO71) - TWO71;
-      if (u > z[3])
-	u -= TWO19;
+      u = TRUNCATE_TO_MUL (z[3], TWO19);
       v = z[3] - u;
 
       if (v == TWO18)
@@ -200,7 +198,8 @@ denorm (const mp_no *x, double *y, int p)
 {
   long i, k;
   long p2 = p;
-  double c, u, z[5];
+  double c;
+  mantissa_t u, z[5];
 
 #define R RADIXI
   if (EX < -44 || (EX == -44 && X[1] < TWO5))
@@ -280,9 +279,7 @@ denorm (const mp_no *x, double *y, int p)
       z[3] = X[k];
     }
 
-  u = (z[3] + TWO57) - TWO57;
-  if (u > z[3])
-    u -= TWO5;
+  u = TRUNCATE_TO_MUL (z[3], TWO5);
 
   if (u == z[3])
     {
@@ -330,7 +327,6 @@ __dbl_mp (double x, mp_no *y, int p)
 {
   long i, n;
   long p2 = p;
-  double u;
 
   /* Sign.  */
   if (x == ZERO)
@@ -356,11 +352,7 @@ __dbl_mp (double x, mp_no *y, int p)
   n = MIN (p2, 4);
   for (i = 1; i <= n; i++)
     {
-      u = (x + TWO52) - TWO52;
-      if (u > x)
-	u -= ONE;
-      Y[i] = u;
-      x -= u;
+      INTEGER_OF (x, Y[i]);
       x *= RADIX;
     }
   for (; i <= p2; i++)
@@ -377,7 +369,7 @@ add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
 {
   long i, j, k;
   long p2 = p;
-  double zk;
+  mantissa_t zk;
 
   EZ = EX;
 
@@ -445,7 +437,7 @@ sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
 {
   long i, j, k;
   long p2 = p;
-  double zk;
+  mantissa_t zk;
 
   EZ = EX;
   i = p2;
@@ -621,9 +613,9 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
 {
   long i, j, k, ip, ip2;
   long p2 = p;
-  double u, zk;
+  mantissa_store_t zk;
   const mp_no *a;
-  double *diag;
+  mantissa_store_t *diag;
 
   /* Is z=0?  */
   if (__glibc_unlikely (X[0] * Y[0] == ZERO))
@@ -680,8 +672,8 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
 
   /* Precompute sums of diagonal elements so that we can directly use them
      later.  See the next comment to know we why need them.  */
-  diag = alloca (k * sizeof (double));
-  double d = ZERO;
+  diag = alloca (k * sizeof (mantissa_store_t));
+  mantissa_store_t d = ZERO;
   for (i = 1; i <= ip; i++)
     {
       d += X[i] * Y[i];
@@ -704,11 +696,8 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
 
       zk -= diag[k - 1];
 
-      u = (zk + CUTTER) - CUTTER;
-      if (u > zk)
-	u -= RADIX;
-      Z[k--] = zk - u;
-      zk = u * RADIXI;
+      DIV_RADIX (zk, Z[k]);
+      k--;
     }
 
   /* The real deal.  Mantissa digit Z[k] is the sum of all X[i] * Y[j] where i
@@ -738,11 +727,8 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
 
       zk -= diag[k - 1];
 
-      u = (zk + CUTTER) - CUTTER;
-      if (u > zk)
-	u -= RADIX;
-      Z[k--] = zk - u;
-      zk = u * RADIXI;
+      DIV_RADIX (zk, Z[k]);
+      k--;
     }
   Z[k] = zk;
 
@@ -774,7 +760,7 @@ SECTION
 __sqr (const mp_no *x, mp_no *y, int p)
 {
   long i, j, k, ip;
-  double u, yk;
+  mantissa_store_t yk;
 
   /* Is z=0?  */
   if (__glibc_unlikely (X[0] == ZERO))
@@ -798,7 +784,7 @@ __sqr (const mp_no *x, mp_no *y, int p)
 
   while (k > p)
     {
-      double yk2 = 0.0;
+      mantissa_store_t yk2 = 0;
       long lim = k / 2;
 
       if (k % 2 == 0)
@@ -818,16 +804,13 @@ __sqr (const mp_no *x, mp_no *y, int p)
 
       yk += 2.0 * yk2;
 
-      u = (yk + CUTTER) - CUTTER;
-      if (u > yk)
-	u -= RADIX;
-      Y[k--] = yk - u;
-      yk = u * RADIXI;
+      DIV_RADIX (yk, Y[k]);
+      k--;
     }
 
   while (k > 1)
     {
-      double yk2 = 0.0;
+      mantissa_store_t yk2 = 0;
       long lim = k / 2;
 
       if (k % 2 == 0)
@@ -839,11 +822,8 @@ __sqr (const mp_no *x, mp_no *y, int p)
 
       yk += 2.0 * yk2;
 
-      u = (yk + CUTTER) - CUTTER;
-      if (u > yk)
-	u -= RADIX;
-      Y[k--] = yk - u;
-      yk = u * RADIXI;
+      DIV_RADIX (yk, Y[k]);
+      k--;
     }
   Y[k] = yk;
 
diff --git a/sysdeps/ieee754/dbl-64/mpa.h b/sysdeps/ieee754/dbl-64/mpa.h
index 168b334..54044a0 100644
--- a/sysdeps/ieee754/dbl-64/mpa.h
+++ b/sysdeps/ieee754/dbl-64/mpa.h
@@ -35,6 +35,7 @@
 /* Common types and definition                                          */
 /************************************************************************/
 
+#include <mpa-arch.h>
 
 /* The mp_no structure holds the details of a multi-precision floating point
    number.
@@ -61,7 +62,7 @@
 typedef struct
 {
   int e;
-  double d[40];
+  mantissa_t d[40];
 } mp_no;
 
 typedef union
@@ -82,9 +83,13 @@ extern const mp_no mptwo;
 
 #define ABS(x)   ((x) <  0  ? -(x) : (x))
 
-#define  RADIX     0x1.0p24		/* 2^24    */
-#define  RADIXI    0x1.0p-24		/* 2^-24   */
-#define  CUTTER    0x1.0p76		/* 2^76    */
+#ifndef RADIXI
+# define  RADIXI    0x1.0p-24		/* 2^-24   */
+#endif
+
+#ifndef TWO52
+# define  TWO52     0x1.0p52		/* 2^52    */
+#endif
 
 #define  ZERO      0.0			/* 0       */
 #define  MZERO     -0.0			/* 0 with the sign bit set */
@@ -92,13 +97,13 @@ extern const mp_no mptwo;
 #define  MONE      -1.0			/* -1      */
 #define  TWO       2.0			/*  2      */
 
-#define  TWO5      0x1.0p5		/* 2^5     */
-#define  TWO8      0x1.0p8		/* 2^52    */
-#define  TWO10     0x1.0p10		/* 2^10    */
-#define  TWO18     0x1.0p18		/* 2^18    */
-#define  TWO19     0x1.0p19		/* 2^19    */
-#define  TWO23     0x1.0p23		/* 2^23    */
-#define  TWO52     0x1.0p52		/* 2^52    */
+#define  TWO5      TWOPOW (5)		/* 2^5     */
+#define  TWO8      TWOPOW (8)		/* 2^52    */
+#define  TWO10     TWOPOW (10)		/* 2^10    */
+#define  TWO18     TWOPOW (18)		/* 2^18    */
+#define  TWO19     TWOPOW (19)		/* 2^19    */
+#define  TWO23     TWOPOW (23)		/* 2^23    */
+
 #define  TWO57     0x1.0p57		/* 2^57    */
 #define  TWO71     0x1.0p71		/* 2^71    */
 #define  TWOM1032  0x1.0p-1032		/* 2^-1032 */
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/mpa-arch.h b/sysdeps/powerpc/powerpc32/power4/fpu/mpa-arch.h
new file mode 100644
index 0000000..d86a1c6
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/mpa-arch.h
@@ -0,0 +1,56 @@
+/* Overridable constants and operations.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU Lesser General Public License as published by
+   the Free Software Foundation; either version 2.1 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.  */
+
+typedef double mantissa_t;
+typedef double mantissa_store_t;
+
+#define TWOPOW(i) (0x1.0p##i)
+
+#define RADIX TWOPOW (24)		/* 2^24    */
+#define CUTTER TWOPOW (76)		/* 2^76    */
+#define RADIXI 0x1.0p-24		/* 2^-24 */
+#define TWO52 TWOPOW (52)		/* 2^52 */
+
+/* Divide D by RADIX and put the remainder in R.  */
+#define DIV_RADIX(d,r) \
+  ({									      \
+    double u = ((d) + CUTTER) - CUTTER;					      \
+    if (u > (d))							      \
+      u -= RADIX;							      \
+    r = (d) - u;							      \
+    (d) = u * RADIXI;							      \
+  })
+
+/* Put the integer component of a double X in R and retain the fraction in
+   X.  */
+#define INTEGER_OF(x, r) \
+  ({									      \
+    double u = ((x) + TWO52) - TWO52;					      \
+    if (u > (x))							      \
+      u -= ONE;								      \
+    (r) = u;								      \
+    (x) -= u;								      \
+  })
+
+/* Truncate IN to a multiple of F, where F is a power of two.  */
+#define TRUNCATE_TO_MUL(in, f) \
+  ({									      \
+    double factor = f * TWO52;						      \
+    double u = (in + factor) - factor;					      \
+    if (u > in)								      \
+      u -= f;								      \
+    u;									      \
+  })
diff --git a/sysdeps/powerpc/powerpc64/power4/fpu/mpa-arch.h b/sysdeps/powerpc/powerpc64/power4/fpu/mpa-arch.h
new file mode 100644
index 0000000..d86a1c6
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power4/fpu/mpa-arch.h
@@ -0,0 +1,56 @@
+/* Overridable constants and operations.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU Lesser General Public License as published by
+   the Free Software Foundation; either version 2.1 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.  */
+
+typedef double mantissa_t;
+typedef double mantissa_store_t;
+
+#define TWOPOW(i) (0x1.0p##i)
+
+#define RADIX TWOPOW (24)		/* 2^24    */
+#define CUTTER TWOPOW (76)		/* 2^76    */
+#define RADIXI 0x1.0p-24		/* 2^-24 */
+#define TWO52 TWOPOW (52)		/* 2^52 */
+
+/* Divide D by RADIX and put the remainder in R.  */
+#define DIV_RADIX(d,r) \
+  ({									      \
+    double u = ((d) + CUTTER) - CUTTER;					      \
+    if (u > (d))							      \
+      u -= RADIX;							      \
+    r = (d) - u;							      \
+    (d) = u * RADIXI;							      \
+  })
+
+/* Put the integer component of a double X in R and retain the fraction in
+   X.  */
+#define INTEGER_OF(x, r) \
+  ({									      \
+    double u = ((x) + TWO52) - TWO52;					      \
+    if (u > (x))							      \
+      u -= ONE;								      \
+    (r) = u;								      \
+    (x) -= u;								      \
+  })
+
+/* Truncate IN to a multiple of F, where F is a power of two.  */
+#define TRUNCATE_TO_MUL(in, f) \
+  ({									      \
+    double factor = f * TWO52;						      \
+    double u = (in + factor) - factor;					      \
+    if (u > in)								      \
+      u -= f;								      \
+    u;									      \
+  })
Follow-Ups:
- Re: [PATCH] Use long for mantissa for generic mp code
  - From: Richard Henderson
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]