This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[ping][PATCH v2] Use long for mantissa for generic mp code
- From: Siddhesh Poyarekar <siddhesh at redhat dot com>
- To: Richard Henderson <rth at twiddle dot net>
- Cc: libc-alpha at sourceware dot org
- Date: Wed, 13 Mar 2013 18:27:01 +0530
- Subject: [ping][PATCH v2] Use long for mantissa for generic mp code
- References: <20130226154206.GY21039@spoyarek.pnq.redhat.com><512D04C2.4020505@twiddle.net><CAAHN_R2buNZ6N0Q6K86yaTq2hz_uc-8MoNyrnTZO3DfC62q2ig@mail.gmail.com><512D7D56.2030209@twiddle.net><20130308123807.GA21984@spoyarek.pnq.redhat.com>
Ping!
On Fri, Mar 08, 2013 at 06:08:07PM +0530, Siddhesh Poyarekar wrote:
> Hi,
>
> I have updated the patch with Richards suggestions and also merged the
> duplicated mpa-arch.h. Tested on x86_64 and powerpc64 (power7).
>
> Siddhesh
>
> * sysdeps/ieee754/dbl-64/mpa-arch.h: New file.
> * sysdeps/ieee754/dbl-64/mpa.c (norm): Use MANTISSA_T to store
> computations on mantissa. Use macros for rounding and
> division.
> (denorm): Likewise.
> (__dbl_mp): Likewise.
> (add_magnitudes): Likewise.
> (sub_magnitudes): Likewise.
> (__mul): Likewise.
> (__sqr): Likewise.
> * sysdeps/ieee754/dbl-64/mpa.h: Include mpa-arch.h. Define
> powers of two in terms of TWOPOW macro.
> (mp_no): Make type of mantissa as MANTISSA_T.
> [!RADIXI]: Define RADIXI.
> [!TWO52]: Define TWO52.
> * sysdeps/powerpc/power4/fpu/mpa-arch.h: New file.
>
> diff --git a/sysdeps/ieee754/dbl-64/mpa-arch.h b/sysdeps/ieee754/dbl-64/mpa-arch.h
> new file mode 100644
> index 0000000..b6e1b76
> --- /dev/null
> +++ b/sysdeps/ieee754/dbl-64/mpa-arch.h
> @@ -0,0 +1,44 @@
> +/* Overridable constants and operations.
> + Copyright (C) 2013 Free Software Foundation, Inc.
> +
> + This program is free software; you can redistribute it and/or modify
> + it under the terms of the GNU Lesser General Public License as published by
> + the Free Software Foundation; either version 2.1 of the License, or
> + (at your option) any later version.
> +
> + This program is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + GNU Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public License
> + along with this program; if not, see <http://www.gnu.org/licenses/>. */
> +
> +#include <stdint.h>
> +
> +typedef long mantissa_t;
> +typedef int64_t mantissa_store_t;
> +
> +#define TWOPOW(i) (1L << i)
> +
> +#define RADIX TWOPOW (24) /* 2^24 */
> +
> +/* Divide D by RADIX and put the remainder in R. D must be a non-negative
> + integral value. */
> +#define DIV_RADIX(d, r) \
> + ({ \
> + r = ((uint64_t) d) % RADIX; \
> + d /= RADIX; \
> + })
> +
> +/* Put the integer component of a double X in R and retain the fraction in
> + X. */
> +#define INTEGER_OF(x, i) \
> + ({ \
> + i = (mantissa_t) x; \
> + x -= i; \
> + })
> +
> +/* Align IN down to F. The code assumes that F is a power of two. */
> +#define ALIGN_DOWN_TO(in, f) ((in) & -(f))
> +
> diff --git a/sysdeps/ieee754/dbl-64/mpa.c b/sysdeps/ieee754/dbl-64/mpa.c
> index 0766476..860e859 100644
> --- a/sysdeps/ieee754/dbl-64/mpa.c
> +++ b/sysdeps/ieee754/dbl-64/mpa.c
> @@ -125,7 +125,8 @@ norm (const mp_no *x, double *y, int p)
> {
> #define R RADIXI
> long i;
> - double a, c, u, v, z[5];
> + double c;
> + mantissa_t a, u, v, z[5];
> if (p < 5)
> {
> if (p == 1)
> @@ -147,17 +148,14 @@ norm (const mp_no *x, double *y, int p)
>
> for (i = 2; i < 5; i++)
> {
> - z[i] = X[i] * a;
> - u = (z[i] + CUTTER) - CUTTER;
> - if (u > z[i])
> - u -= RADIX;
> - z[i] -= u;
> - z[i - 1] += u * RADIXI;
> + mantissa_t d, r;
> + d = X[i] * a;
> + DIV_RADIX (d, r);
> + z[i] = r;
> + z[i - 1] += d;
> }
>
> - u = (z[3] + TWO71) - TWO71;
> - if (u > z[3])
> - u -= TWO19;
> + u = ALIGN_DOWN_TO (z[3], TWO19);
> v = z[3] - u;
>
> if (v == TWO18)
> @@ -200,7 +198,8 @@ denorm (const mp_no *x, double *y, int p)
> {
> long i, k;
> long p2 = p;
> - double c, u, z[5];
> + double c;
> + mantissa_t u, z[5];
>
> #define R RADIXI
> if (EX < -44 || (EX == -44 && X[1] < TWO5))
> @@ -280,9 +279,7 @@ denorm (const mp_no *x, double *y, int p)
> z[3] = X[k];
> }
>
> - u = (z[3] + TWO57) - TWO57;
> - if (u > z[3])
> - u -= TWO5;
> + u = ALIGN_DOWN_TO (z[3], TWO5);
>
> if (u == z[3])
> {
> @@ -330,7 +327,6 @@ __dbl_mp (double x, mp_no *y, int p)
> {
> long i, n;
> long p2 = p;
> - double u;
>
> /* Sign. */
> if (x == ZERO)
> @@ -356,11 +352,7 @@ __dbl_mp (double x, mp_no *y, int p)
> n = MIN (p2, 4);
> for (i = 1; i <= n; i++)
> {
> - u = (x + TWO52) - TWO52;
> - if (u > x)
> - u -= ONE;
> - Y[i] = u;
> - x -= u;
> + INTEGER_OF (x, Y[i]);
> x *= RADIX;
> }
> for (; i <= p2; i++)
> @@ -377,7 +369,7 @@ add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
> {
> long i, j, k;
> long p2 = p;
> - double zk;
> + mantissa_t zk;
>
> EZ = EX;
>
> @@ -445,7 +437,7 @@ sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
> {
> long i, j, k;
> long p2 = p;
> - double zk;
> + mantissa_t zk;
>
> EZ = EX;
> i = p2;
> @@ -621,9 +613,9 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
> {
> long i, j, k, ip, ip2;
> long p2 = p;
> - double u, zk;
> + mantissa_store_t zk;
> const mp_no *a;
> - double *diag;
> + mantissa_store_t *diag;
>
> /* Is z=0? */
> if (__glibc_unlikely (X[0] * Y[0] == ZERO))
> @@ -680,8 +672,8 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
>
> /* Precompute sums of diagonal elements so that we can directly use them
> later. See the next comment to know we why need them. */
> - diag = alloca (k * sizeof (double));
> - double d = ZERO;
> + diag = alloca (k * sizeof (mantissa_store_t));
> + mantissa_store_t d = ZERO;
> for (i = 1; i <= ip; i++)
> {
> d += X[i] * Y[i];
> @@ -704,11 +696,8 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
>
> zk -= diag[k - 1];
>
> - u = (zk + CUTTER) - CUTTER;
> - if (u > zk)
> - u -= RADIX;
> - Z[k--] = zk - u;
> - zk = u * RADIXI;
> + DIV_RADIX (zk, Z[k]);
> + k--;
> }
>
> /* The real deal. Mantissa digit Z[k] is the sum of all X[i] * Y[j] where i
> @@ -738,11 +727,8 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
>
> zk -= diag[k - 1];
>
> - u = (zk + CUTTER) - CUTTER;
> - if (u > zk)
> - u -= RADIX;
> - Z[k--] = zk - u;
> - zk = u * RADIXI;
> + DIV_RADIX (zk, Z[k]);
> + k--;
> }
> Z[k] = zk;
>
> @@ -774,7 +760,7 @@ SECTION
> __sqr (const mp_no *x, mp_no *y, int p)
> {
> long i, j, k, ip;
> - double u, yk;
> + mantissa_store_t yk;
>
> /* Is z=0? */
> if (__glibc_unlikely (X[0] == ZERO))
> @@ -798,7 +784,7 @@ __sqr (const mp_no *x, mp_no *y, int p)
>
> while (k > p)
> {
> - double yk2 = 0.0;
> + mantissa_store_t yk2 = 0;
> long lim = k / 2;
>
> if (k % 2 == 0)
> @@ -818,16 +804,13 @@ __sqr (const mp_no *x, mp_no *y, int p)
>
> yk += 2.0 * yk2;
>
> - u = (yk + CUTTER) - CUTTER;
> - if (u > yk)
> - u -= RADIX;
> - Y[k--] = yk - u;
> - yk = u * RADIXI;
> + DIV_RADIX (yk, Y[k]);
> + k--;
> }
>
> while (k > 1)
> {
> - double yk2 = 0.0;
> + mantissa_store_t yk2 = 0;
> long lim = k / 2;
>
> if (k % 2 == 0)
> @@ -839,11 +822,8 @@ __sqr (const mp_no *x, mp_no *y, int p)
>
> yk += 2.0 * yk2;
>
> - u = (yk + CUTTER) - CUTTER;
> - if (u > yk)
> - u -= RADIX;
> - Y[k--] = yk - u;
> - yk = u * RADIXI;
> + DIV_RADIX (yk, Y[k]);
> + k--;
> }
> Y[k] = yk;
>
> diff --git a/sysdeps/ieee754/dbl-64/mpa.h b/sysdeps/ieee754/dbl-64/mpa.h
> index 168b334..54044a0 100644
> --- a/sysdeps/ieee754/dbl-64/mpa.h
> +++ b/sysdeps/ieee754/dbl-64/mpa.h
> @@ -35,6 +35,7 @@
> /* Common types and definition */
> /************************************************************************/
>
> +#include <mpa-arch.h>
>
> /* The mp_no structure holds the details of a multi-precision floating point
> number.
> @@ -61,7 +62,7 @@
> typedef struct
> {
> int e;
> - double d[40];
> + mantissa_t d[40];
> } mp_no;
>
> typedef union
> @@ -82,9 +83,13 @@ extern const mp_no mptwo;
>
> #define ABS(x) ((x) < 0 ? -(x) : (x))
>
> -#define RADIX 0x1.0p24 /* 2^24 */
> -#define RADIXI 0x1.0p-24 /* 2^-24 */
> -#define CUTTER 0x1.0p76 /* 2^76 */
> +#ifndef RADIXI
> +# define RADIXI 0x1.0p-24 /* 2^-24 */
> +#endif
> +
> +#ifndef TWO52
> +# define TWO52 0x1.0p52 /* 2^52 */
> +#endif
>
> #define ZERO 0.0 /* 0 */
> #define MZERO -0.0 /* 0 with the sign bit set */
> @@ -92,13 +97,13 @@ extern const mp_no mptwo;
> #define MONE -1.0 /* -1 */
> #define TWO 2.0 /* 2 */
>
> -#define TWO5 0x1.0p5 /* 2^5 */
> -#define TWO8 0x1.0p8 /* 2^52 */
> -#define TWO10 0x1.0p10 /* 2^10 */
> -#define TWO18 0x1.0p18 /* 2^18 */
> -#define TWO19 0x1.0p19 /* 2^19 */
> -#define TWO23 0x1.0p23 /* 2^23 */
> -#define TWO52 0x1.0p52 /* 2^52 */
> +#define TWO5 TWOPOW (5) /* 2^5 */
> +#define TWO8 TWOPOW (8) /* 2^52 */
> +#define TWO10 TWOPOW (10) /* 2^10 */
> +#define TWO18 TWOPOW (18) /* 2^18 */
> +#define TWO19 TWOPOW (19) /* 2^19 */
> +#define TWO23 TWOPOW (23) /* 2^23 */
> +
> #define TWO57 0x1.0p57 /* 2^57 */
> #define TWO71 0x1.0p71 /* 2^71 */
> #define TWOM1032 0x1.0p-1032 /* 2^-1032 */
> diff --git a/sysdeps/powerpc/power4/fpu/mpa-arch.h b/sysdeps/powerpc/power4/fpu/mpa-arch.h
> new file mode 100644
> index 0000000..9007c9d
> --- /dev/null
> +++ b/sysdeps/powerpc/power4/fpu/mpa-arch.h
> @@ -0,0 +1,56 @@
> +/* Overridable constants and operations.
> + Copyright (C) 2013 Free Software Foundation, Inc.
> +
> + This program is free software; you can redistribute it and/or modify
> + it under the terms of the GNU Lesser General Public License as published by
> + the Free Software Foundation; either version 2.1 of the License, or
> + (at your option) any later version.
> +
> + This program is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + GNU Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public License
> + along with this program; if not, see <http://www.gnu.org/licenses/>. */
> +
> +typedef double mantissa_t;
> +typedef double mantissa_store_t;
> +
> +#define TWOPOW(i) (0x1.0p##i)
> +
> +#define RADIX TWOPOW (24) /* 2^24 */
> +#define CUTTER TWOPOW (76) /* 2^76 */
> +#define RADIXI 0x1.0p-24 /* 2^-24 */
> +#define TWO52 TWOPOW (52) /* 2^52 */
> +
> +/* Divide D by RADIX and put the remainder in R. */
> +#define DIV_RADIX(d,r) \
> + ({ \
> + double u = ((d) + CUTTER) - CUTTER; \
> + if (u > (d)) \
> + u -= RADIX; \
> + r = (d) - u; \
> + (d) = u * RADIXI; \
> + })
> +
> +/* Put the integer component of a double X in R and retain the fraction in
> + X. */
> +#define INTEGER_OF(x, r) \
> + ({ \
> + double u = ((x) + TWO52) - TWO52; \
> + if (u > (x)) \
> + u -= ONE; \
> + (r) = u; \
> + (x) -= u; \
> + })
> +
> +/* Align IN down to a multiple of F, where F is a power of two. */
> +#define ALIGN_DOWN_TO(in, f) \
> + ({ \
> + double factor = f * TWO52; \
> + double u = (in + factor) - factor; \
> + if (u > in) \
> + u -= f; \
> + u; \
> + })