This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
GNU C Library master sources branch siddhesh/libm-mpa created. glibc-2.17-151-g1ec4ecf
- From: siddhesh at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 18 Jan 2013 13:47:57 -0000
- Subject: GNU C Library master sources branch siddhesh/libm-mpa created. glibc-2.17-151-g1ec4ecf
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".
The branch, siddhesh/libm-mpa has been created
at 1ec4ecfb7d178e185925dccc7d579a58013dbed8 (commit)
- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=1ec4ecfb7d178e185925dccc7d579a58013dbed8
commit 1ec4ecfb7d178e185925dccc7d579a58013dbed8
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
Date: Fri Jan 18 19:13:56 2013 +0530
Convert mantissa to long
Minimal patch to get this working
diff --git a/sysdeps/ieee754/dbl-64/mpa.c b/sysdeps/ieee754/dbl-64/mpa.c
index 4040f1b..f9fe941 100644
--- a/sysdeps/ieee754/dbl-64/mpa.c
+++ b/sysdeps/ieee754/dbl-64/mpa.c
@@ -604,8 +604,8 @@ SECTION
__mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
{
int i, j, k, ip, ip2;
- double u, zk;
- double *diag;
+ int64_t zk;
+ int64_t *diag;
/* Is z=0? */
if (__glibc_unlikely (X[0] * Y[0] == ZERO))
@@ -645,18 +645,15 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
while (k > p)
{
for (i = k - p, j = p; i < p + 1; i++, j--)
- zk += X[i] * Y[j];
+ zk += (int64_t) X[i] * Y[j];
- u = (zk + CUTTER) - CUTTER;
- if (u > zk)
- u -= RADIX;
- Z[k--] = zk - u;
- zk = u * RADIXI;
+ Z[k--] = zk % I_RADIX;
+ zk /= I_RADIX;
}
/* Precompute sums of diagonal elements so that we can directly use them
later. See the next comment to know we why need them. */
- diag = alloca (k * sizeof (double));
+ diag = alloca (k * sizeof (int64_t));
diag[0] = ZERO;
for (i = 1; i <= ip; i++)
{
@@ -692,15 +689,12 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
}
for (i = 1, j = k - 1; i <= lim; i++, j--)
- zk += (X[i] + X[j]) * (Y[i] + Y[j]);
+ zk += (int64_t) (X[i] + X[j]) * (Y[i] + Y[j]);
zk -= diag[k - 1];
- u = (zk + CUTTER) - CUTTER;
- if (u > zk)
- u -= RADIX;
- Z[k--] = zk - u;
- zk = u * RADIXI;
+ Z[k--] = zk % I_RADIX;
+ zk /= I_RADIX;
}
Z[k] = zk;
diff --git a/sysdeps/ieee754/dbl-64/mpa.h b/sysdeps/ieee754/dbl-64/mpa.h
index 06343d4..6f8efb3 100644
--- a/sysdeps/ieee754/dbl-64/mpa.h
+++ b/sysdeps/ieee754/dbl-64/mpa.h
@@ -61,7 +61,7 @@
typedef struct
{
int e;
- double d[40];
+ long d[40];
} mp_no;
typedef union
@@ -82,6 +82,8 @@ extern const mp_no mptwo;
#define ABS(x) ((x) < 0 ? -(x) : (x))
+#define I_RADIX (1 << 24) /* 2^24 */
+
#define RADIX 0x1.0p24 /* 2^24 */
#define RADIXI 0x1.0p-24 /* 2^-24 */
#define CUTTER 0x1.0p76 /* 2^76 */
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=41a3e8d07e56b37246f67fc1bb88e2eec839fa58
commit 41a3e8d07e56b37246f67fc1bb88e2eec839fa58
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
Date: Fri Jan 18 19:01:45 2013 +0530
Another tweak to the multiplication algorithm
Reduce the formula to calculate mantissa so that we reduce the net
number of multiplications performed.
diff --git a/sysdeps/ieee754/dbl-64/mpa.c b/sysdeps/ieee754/dbl-64/mpa.c
index 0897b1c..4040f1b 100644
--- a/sysdeps/ieee754/dbl-64/mpa.c
+++ b/sysdeps/ieee754/dbl-64/mpa.c
@@ -43,6 +43,7 @@
#include "endian.h"
#include "mpa.h"
#include <sys/param.h>
+#include <alloca.h>
#ifndef SECTION
# define SECTION
@@ -604,6 +605,7 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
{
int i, j, k, ip, ip2;
double u, zk;
+ double *diag;
/* Is z=0? */
if (__glibc_unlikely (X[0] * Y[0] == ZERO))
@@ -652,11 +654,47 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
zk = u * RADIXI;
}
- /* The real deal. */
+ /* Precompute sums of diagonal elements so that we can directly use them
+ later. See the next comment to know we why need them. */
+ diag = alloca (k * sizeof (double));
+ diag[0] = ZERO;
+ for (i = 1; i <= ip; i++)
+ {
+ diag[i] = X[i] * Y[i];
+ diag[i] += diag[i - 1];
+ }
+ while (i < k)
+ diag[i++] = diag[ip];
+
+ /* The real deal. Mantissa digit Z[k] is the sum of all X[i] * Y[j] where i
+ goes from 1 -> k - 1 and j goes the same range in reverse. To reduce the
+ number of multiplications, we halve the range and if k is an even number,
+ add the diagonal element X[k/2]Y[k/2]. Through the half range, we compute
+ X[i] * Y[j] as (X[i] + X[j]) * (Y[i] + Y[j]) - X[i] * Y[i] - X[j] * Y[j].
+
+ This reduction tells us that we're summing two things, the first term
+ through the half range and the negative of the sum of the product of all
+ terms of X and Y in the full range. i.e.
+
+ SUM(X[i] * Y[i]) for k terms. This is precalculated above for each k in
+ a single loop so that it completes in O(n) time and can hence be directly
+ used in the loop below. */
while (k > 1)
{
- for (i = 1, j = k - 1; i < k; i++, j--)
- zk += X[i] * Y[j];
+ int lim = k / 2;
+
+ if (k % 2 == 0)
+ {
+ /* We want to add this only once, but since we subtract it in the sum
+ of products above, we add twice. */
+ zk += 2 * X[lim] * Y[lim];
+ lim--;
+ }
+
+ for (i = 1, j = k - 1; i <= lim; i++, j--)
+ zk += (X[i] + X[j]) * (Y[i] + Y[j]);
+
+ zk -= diag[k - 1];
u = (zk + CUTTER) - CUTTER;
if (u > zk)
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=26d6436eb76e9042df70f9b092852b3f03f3ec22
commit 26d6436eb76e9042df70f9b092852b3f03f3ec22
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
Date: Wed Jan 16 17:46:37 2013 +0530
Improvement to the patch
diff --git a/sysdeps/ieee754/dbl-64/mpa.c b/sysdeps/ieee754/dbl-64/mpa.c
index a5e98ba..0897b1c 100644
--- a/sysdeps/ieee754/dbl-64/mpa.c
+++ b/sysdeps/ieee754/dbl-64/mpa.c
@@ -602,7 +602,7 @@ void
SECTION
__mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
{
- int i, j, k, ip = p;
+ int i, j, k, ip, ip2;
double u, zk;
/* Is z=0? */
@@ -613,21 +613,33 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
}
/* We need not iterate through all X's and Y's since it's pointless to
- multiply zeroes. */
- for (i = p; i > 0; i--)
- if (X[i] == ZERO && Y[i] == ZERO)
- ip--;
- else
+ multiply zeroes. Here, both are zero... */
+ for (ip2 = p; ip2 > 0; ip2--)
+ if (X[ip2] != ZERO || Y[ip2] != ZERO)
+ break;
+
+ /* ... and here, at least one of them is still zero. */
+ for (ip = ip2; ip > 0; ip--)
+ if (X[ip] * Y[ip] != ZERO)
break;
/* Multiply, add and carry. */
k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
- while (k > 2 * ip)
+ /* For X with precision IP and Y with precision IP2, the term X[I]*Y[K-I] is
+ non-zero only when the ranges of non-zero values overlap. This happens
+ only for values of K <= IP + IP2. Note that we go from 1..K-1, which is
+ why we come down to IP + IP2 + 1 and not just IP + IP2. */
+ while (k > ip + ip2 + 1)
Z[k--] = ZERO;
zk = Z[k] = ZERO;
+ /* This gives us additional precision if required. This is only executed
+ when P < IP1 + IP2 + 1, i.e. at least one of the numbers have precision
+ of greater than or equal to half of what's required (P). Anything less
+ and we're just wasting our time since we'll be spinning around
+ multiplying zeroes. */
while (k > p)
{
for (i = k - p, j = p; i < p + 1; i++, j--)
@@ -640,6 +652,7 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
zk = u * RADIXI;
}
+ /* The real deal. */
while (k > 1)
{
for (i = 1, j = k - 1; i < k; i++, j--)
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=dc591ad36ef11f270bca22dedc29ac8a7d17fb85
commit dc591ad36ef11f270bca22dedc29ac8a7d17fb85
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
Date: Wed Jan 16 16:50:56 2013 +0530
Fixes to the patch
diff --git a/sysdeps/ieee754/dbl-64/mpa.c b/sysdeps/ieee754/dbl-64/mpa.c
index 8b4183e..a5e98ba 100644
--- a/sysdeps/ieee754/dbl-64/mpa.c
+++ b/sysdeps/ieee754/dbl-64/mpa.c
@@ -602,7 +602,7 @@ void
SECTION
__mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
{
- int i, j, k, k2, ip = p;
+ int i, j, k, ip = p;
double u, zk;
/* Is z=0? */
@@ -615,28 +615,30 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
/* We need not iterate through all X's and Y's since it's pointless to
multiply zeroes. */
for (i = p; i > 0; i--)
- if (X[i] == 0 && Y[i] == 0)
+ if (X[i] == ZERO && Y[i] == ZERO)
ip--;
else
break;
/* Multiply, add and carry. */
- k2 = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
- zk = Z[k2] = ZERO;
+ k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
- for (k = k2; k > p; k--)
+ while (k > 2 * ip)
+ Z[k--] = ZERO;
+
+ zk = Z[k] = ZERO;
+
+ while (k > p)
{
- for (i = k - ip, j = ip; i < ip + 1; i++, j--)
+ for (i = k - p, j = p; i < p + 1; i++, j--)
zk += X[i] * Y[j];
u = (zk + CUTTER) - CUTTER;
if (u > zk)
u -= RADIX;
- Z[k] = zk - u;
+ Z[k--] = zk - u;
zk = u * RADIXI;
}
- Z[k] = zk;
- k = MIN(k, 2 * ip);
while (k > 1)
{
@@ -646,9 +648,8 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
u = (zk + CUTTER) - CUTTER;
if (u > zk)
u -= RADIX;
- Z[k] = zk - u;
+ Z[k--] = zk - u;
zk = u * RADIXI;
- k--;
}
Z[k] = zk;
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=50daad0dfb0ca3fce532e50d92ee6cb714ad818d
commit 50daad0dfb0ca3fce532e50d92ee6cb714ad818d
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
Date: Fri Jan 11 19:19:30 2013 +0530
Optimized mp multiplication
Don't bother multiplying zeroes since that only wastes cycles. For a
most cases, it ends up wasting a lot of cycles.
diff --git a/sysdeps/ieee754/dbl-64/mpa.c b/sysdeps/ieee754/dbl-64/mpa.c
index ede8ed1..8b4183e 100644
--- a/sysdeps/ieee754/dbl-64/mpa.c
+++ b/sysdeps/ieee754/dbl-64/mpa.c
@@ -602,7 +602,7 @@ void
SECTION
__mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
{
- int i, j, k, k2;
+ int i, j, k, k2, ip = p;
double u, zk;
/* Is z=0? */
@@ -612,13 +612,21 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
return;
}
+ /* We need not iterate through all X's and Y's since it's pointless to
+ multiply zeroes. */
+ for (i = p; i > 0; i--)
+ if (X[i] == 0 && Y[i] == 0)
+ ip--;
+ else
+ break;
+
/* Multiply, add and carry. */
k2 = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
zk = Z[k2] = ZERO;
for (k = k2; k > p; k--)
{
- for (i = k - p, j = p; i < p + 1; i++, j--)
+ for (i = k - ip, j = ip; i < ip + 1; i++, j--)
zk += X[i] * Y[j];
u = (zk + CUTTER) - CUTTER;
@@ -627,6 +635,8 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
Z[k] = zk - u;
zk = u * RADIXI;
}
+ Z[k] = zk;
+ k = MIN(k, 2 * ip);
while (k > 1)
{
-----------------------------------------------------------------------
hooks/post-receive
--
GNU C Library master sources