This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch siddhesh/libm-mpa created. glibc-2.17-151-g1ec4ecf


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, siddhesh/libm-mpa has been created
        at  1ec4ecfb7d178e185925dccc7d579a58013dbed8 (commit)

- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=1ec4ecfb7d178e185925dccc7d579a58013dbed8

commit 1ec4ecfb7d178e185925dccc7d579a58013dbed8
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
Date:   Fri Jan 18 19:13:56 2013 +0530

    Convert mantissa to long
    
    Minimal patch to get this working

diff --git a/sysdeps/ieee754/dbl-64/mpa.c b/sysdeps/ieee754/dbl-64/mpa.c
index 4040f1b..f9fe941 100644
--- a/sysdeps/ieee754/dbl-64/mpa.c
+++ b/sysdeps/ieee754/dbl-64/mpa.c
@@ -604,8 +604,8 @@ SECTION
 __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
 {
   int i, j, k, ip, ip2;
-  double u, zk;
-  double *diag;
+  int64_t zk;
+  int64_t *diag;
 
   /* Is z=0?  */
   if (__glibc_unlikely (X[0] * Y[0] == ZERO))
@@ -645,18 +645,15 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
   while (k > p)
     {
       for (i = k - p, j = p; i < p + 1; i++, j--)
-	zk += X[i] * Y[j];
+	zk += (int64_t) X[i] * Y[j];
 
-      u = (zk + CUTTER) - CUTTER;
-      if (u > zk)
-	u -= RADIX;
-      Z[k--] = zk - u;
-      zk = u * RADIXI;
+      Z[k--] = zk % I_RADIX;
+      zk /= I_RADIX;
     }
 
   /* Precompute sums of diagonal elements so that we can directly use them
      later.  See the next comment to know we why need them.  */
-  diag = alloca (k * sizeof (double));
+  diag = alloca (k * sizeof (int64_t));
   diag[0] = ZERO;
   for (i = 1; i <= ip; i++)
     {
@@ -692,15 +689,12 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
 	}
 
       for (i = 1, j = k - 1; i <= lim; i++, j--)
-	zk += (X[i] + X[j]) * (Y[i] + Y[j]);
+	zk += (int64_t) (X[i] + X[j]) * (Y[i] + Y[j]);
 
       zk -= diag[k - 1];
 
-      u = (zk + CUTTER) - CUTTER;
-      if (u > zk)
-	u -= RADIX;
-      Z[k--] = zk - u;
-      zk = u * RADIXI;
+      Z[k--] = zk % I_RADIX;
+      zk /= I_RADIX;
     }
   Z[k] = zk;
 
diff --git a/sysdeps/ieee754/dbl-64/mpa.h b/sysdeps/ieee754/dbl-64/mpa.h
index 06343d4..6f8efb3 100644
--- a/sysdeps/ieee754/dbl-64/mpa.h
+++ b/sysdeps/ieee754/dbl-64/mpa.h
@@ -61,7 +61,7 @@
 typedef struct
 {
   int e;
-  double d[40];
+  long d[40];
 } mp_no;
 
 typedef union
@@ -82,6 +82,8 @@ extern const mp_no mptwo;
 
 #define ABS(x)   ((x) <  0  ? -(x) : (x))
 
+#define I_RADIX    (1 << 24)		/* 2^24 */
+
 #define  RADIX     0x1.0p24		/* 2^24    */
 #define  RADIXI    0x1.0p-24		/* 2^-24   */
 #define  CUTTER    0x1.0p76		/* 2^76    */

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=41a3e8d07e56b37246f67fc1bb88e2eec839fa58

commit 41a3e8d07e56b37246f67fc1bb88e2eec839fa58
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
Date:   Fri Jan 18 19:01:45 2013 +0530

    Another tweak to the multiplication algorithm
    
    Reduce the formula to calculate mantissa so that we reduce the net
    number of multiplications performed.

diff --git a/sysdeps/ieee754/dbl-64/mpa.c b/sysdeps/ieee754/dbl-64/mpa.c
index 0897b1c..4040f1b 100644
--- a/sysdeps/ieee754/dbl-64/mpa.c
+++ b/sysdeps/ieee754/dbl-64/mpa.c
@@ -43,6 +43,7 @@
 #include "endian.h"
 #include "mpa.h"
 #include <sys/param.h>
+#include <alloca.h>
 
 #ifndef SECTION
 # define SECTION
@@ -604,6 +605,7 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
 {
   int i, j, k, ip, ip2;
   double u, zk;
+  double *diag;
 
   /* Is z=0?  */
   if (__glibc_unlikely (X[0] * Y[0] == ZERO))
@@ -652,11 +654,47 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
       zk = u * RADIXI;
     }
 
-  /* The real deal.  */
+  /* Precompute sums of diagonal elements so that we can directly use them
+     later.  See the next comment to know we why need them.  */
+  diag = alloca (k * sizeof (double));
+  diag[0] = ZERO;
+  for (i = 1; i <= ip; i++)
+    {
+      diag[i] = X[i] * Y[i];
+      diag[i] += diag[i - 1];
+    }
+  while (i < k)
+    diag[i++] = diag[ip];
+
+  /* The real deal.  Mantissa digit Z[k] is the sum of all X[i] * Y[j] where i
+     goes from 1 -> k - 1 and j goes the same range in reverse.  To reduce the
+     number of multiplications, we halve the range and if k is an even number,
+     add the diagonal element X[k/2]Y[k/2].  Through the half range, we compute
+     X[i] * Y[j] as (X[i] + X[j]) * (Y[i] + Y[j]) - X[i] * Y[i] - X[j] * Y[j].
+
+     This reduction tells us that we're summing two things, the first term
+     through the half range and the negative of the sum of the product of all
+     terms of X and Y in the full range.  i.e.
+
+     SUM(X[i] * Y[i]) for k terms.  This is precalculated above for each k in
+     a single loop so that it completes in O(n) time and can hence be directly
+     used in the loop below.  */
   while (k > 1)
     {
-      for (i = 1, j = k - 1; i < k; i++, j--)
-	zk += X[i] * Y[j];
+      int lim = k / 2;
+
+      if (k % 2 == 0)
+        {
+	  /* We want to add this only once, but since we subtract it in the sum
+	     of products above, we add twice.  */
+          zk += 2 * X[lim] * Y[lim];
+	  lim--;
+	}
+
+      for (i = 1, j = k - 1; i <= lim; i++, j--)
+	zk += (X[i] + X[j]) * (Y[i] + Y[j]);
+
+      zk -= diag[k - 1];
 
       u = (zk + CUTTER) - CUTTER;
       if (u > zk)

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=26d6436eb76e9042df70f9b092852b3f03f3ec22

commit 26d6436eb76e9042df70f9b092852b3f03f3ec22
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
Date:   Wed Jan 16 17:46:37 2013 +0530

    Improvement to the patch

diff --git a/sysdeps/ieee754/dbl-64/mpa.c b/sysdeps/ieee754/dbl-64/mpa.c
index a5e98ba..0897b1c 100644
--- a/sysdeps/ieee754/dbl-64/mpa.c
+++ b/sysdeps/ieee754/dbl-64/mpa.c
@@ -602,7 +602,7 @@ void
 SECTION
 __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
 {
-  int i, j, k, ip = p;
+  int i, j, k, ip, ip2;
   double u, zk;
 
   /* Is z=0?  */
@@ -613,21 +613,33 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
     }
 
   /* We need not iterate through all X's and Y's since it's pointless to
-     multiply zeroes.  */
-  for (i = p; i > 0; i--)
-    if (X[i] == ZERO && Y[i] == ZERO)
-      ip--;
-    else
+     multiply zeroes.  Here, both are zero...  */
+  for (ip2 = p; ip2 > 0; ip2--)
+    if (X[ip2] != ZERO || Y[ip2] != ZERO)
+      break;
+
+  /* ... and here, at least one of them is still zero.  */
+  for (ip = ip2; ip > 0; ip--)
+    if (X[ip] * Y[ip] != ZERO)
       break;
 
   /* Multiply, add and carry.  */
   k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
 
-  while (k > 2 * ip)
+  /* For X with precision IP and Y with precision IP2, the term X[I]*Y[K-I] is
+     non-zero only when the ranges of non-zero values overlap.  This happens
+     only for values of K <= IP + IP2.  Note that we go from 1..K-1, which is
+     why we come down to IP + IP2 + 1 and not just IP + IP2.  */
+  while (k > ip + ip2 + 1)
     Z[k--] = ZERO;
 
   zk = Z[k] = ZERO;
 
+  /* This gives us additional precision if required.  This is only executed
+     when P < IP1 + IP2 + 1, i.e. at least one of the numbers have precision
+     of greater than or equal to half of what's required (P).  Anything less
+     and we're just wasting our time since we'll be spinning around
+     multiplying zeroes.  */
   while (k > p)
     {
       for (i = k - p, j = p; i < p + 1; i++, j--)
@@ -640,6 +652,7 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
       zk = u * RADIXI;
     }
 
+  /* The real deal.  */
   while (k > 1)
     {
       for (i = 1, j = k - 1; i < k; i++, j--)

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=dc591ad36ef11f270bca22dedc29ac8a7d17fb85

commit dc591ad36ef11f270bca22dedc29ac8a7d17fb85
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
Date:   Wed Jan 16 16:50:56 2013 +0530

    Fixes to the patch

diff --git a/sysdeps/ieee754/dbl-64/mpa.c b/sysdeps/ieee754/dbl-64/mpa.c
index 8b4183e..a5e98ba 100644
--- a/sysdeps/ieee754/dbl-64/mpa.c
+++ b/sysdeps/ieee754/dbl-64/mpa.c
@@ -602,7 +602,7 @@ void
 SECTION
 __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
 {
-  int i, j, k, k2, ip = p;
+  int i, j, k, ip = p;
   double u, zk;
 
   /* Is z=0?  */
@@ -615,28 +615,30 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
   /* We need not iterate through all X's and Y's since it's pointless to
      multiply zeroes.  */
   for (i = p; i > 0; i--)
-    if (X[i] == 0 && Y[i] == 0)
+    if (X[i] == ZERO && Y[i] == ZERO)
       ip--;
     else
       break;
 
   /* Multiply, add and carry.  */
-  k2 = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
-  zk = Z[k2] = ZERO;
+  k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
 
-  for (k = k2; k > p; k--)
+  while (k > 2 * ip)
+    Z[k--] = ZERO;
+
+  zk = Z[k] = ZERO;
+
+  while (k > p)
     {
-      for (i = k - ip, j = ip; i < ip + 1; i++, j--)
+      for (i = k - p, j = p; i < p + 1; i++, j--)
 	zk += X[i] * Y[j];
 
       u = (zk + CUTTER) - CUTTER;
       if (u > zk)
 	u -= RADIX;
-      Z[k] = zk - u;
+      Z[k--] = zk - u;
       zk = u * RADIXI;
     }
-  Z[k] = zk;
-  k = MIN(k, 2 * ip);
 
   while (k > 1)
     {
@@ -646,9 +648,8 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
       u = (zk + CUTTER) - CUTTER;
       if (u > zk)
 	u -= RADIX;
-      Z[k] = zk - u;
+      Z[k--] = zk - u;
       zk = u * RADIXI;
-      k--;
     }
   Z[k] = zk;
 

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=50daad0dfb0ca3fce532e50d92ee6cb714ad818d

commit 50daad0dfb0ca3fce532e50d92ee6cb714ad818d
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
Date:   Fri Jan 11 19:19:30 2013 +0530

    Optimized mp multiplication
    
    Don't bother multiplying zeroes since that only wastes cycles.  For a
    most cases, it ends up wasting a lot of cycles.

diff --git a/sysdeps/ieee754/dbl-64/mpa.c b/sysdeps/ieee754/dbl-64/mpa.c
index ede8ed1..8b4183e 100644
--- a/sysdeps/ieee754/dbl-64/mpa.c
+++ b/sysdeps/ieee754/dbl-64/mpa.c
@@ -602,7 +602,7 @@ void
 SECTION
 __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
 {
-  int i, j, k, k2;
+  int i, j, k, k2, ip = p;
   double u, zk;
 
   /* Is z=0?  */
@@ -612,13 +612,21 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
       return;
     }
 
+  /* We need not iterate through all X's and Y's since it's pointless to
+     multiply zeroes.  */
+  for (i = p; i > 0; i--)
+    if (X[i] == 0 && Y[i] == 0)
+      ip--;
+    else
+      break;
+
   /* Multiply, add and carry.  */
   k2 = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
   zk = Z[k2] = ZERO;
 
   for (k = k2; k > p; k--)
     {
-      for (i = k - p, j = p; i < p + 1; i++, j--)
+      for (i = k - ip, j = ip; i < ip + 1; i++, j--)
 	zk += X[i] * Y[j];
 
       u = (zk + CUTTER) - CUTTER;
@@ -627,6 +635,8 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
       Z[k] = zk - u;
       zk = u * RADIXI;
     }
+  Z[k] = zk;
+  k = MIN(k, 2 * ip);
 
   while (k > 1)
     {

-----------------------------------------------------------------------


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]