ce426f
Partial backports of:
ce426f
=====================
ce426f
ce426f
commit c5d5d574cbfa96d0f6c1db24d1e072c472627e41
ce426f
Author: Ondřej Bílka <neleai@seznam.cz>
ce426f
Date:   Thu Oct 17 16:03:24 2013 +0200
ce426f
ce426f
    Format floating routines.
ce426f
ce426f
commit da08f647d58d674db08cdb3e61c8826c89470e2e
ce426f
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
ce426f
Date:   Fri Dec 21 09:15:10 2012 +0530
ce426f
ce426f
    Move more constants into static variables
ce426f
    
ce426f
    Code cleanup.
ce426f
ce426f
commit f93a8d15699ee699282465dc1e03e033f3fabb52
ce426f
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
ce426f
Date:   Wed Jan 16 16:06:48 2013 +0530
ce426f
ce426f
    Consolidate constant defines into mpa.h
ce426f
ce426f
commit caa99d06e7f1403887294442af520b0f8c6f3de0
ce426f
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
ce426f
Date:   Fri Jan 18 11:18:13 2013 +0530
ce426f
ce426f
    Simplify calculation of 2^-m in __mpexp
ce426f
ce426f
commit 107a5bf085f5c4ef8c28266a34d476724cfc3475
ce426f
Author: Joseph Myers <joseph@codesourcery.com>
ce426f
Date:   Tue Nov 18 15:40:56 2014 +0000
ce426f
ce426f
    Fix libm mpone, mptwo namespace (bug 17616).
ce426f
ce426f
To provided __mptwo for __inv.
ce426f
ce426f
Full backports of the following:
ce426f
================================
ce426f
ce426f
commit 44e0d4c20ce5bf3825897e5d4b7caae94016214d
ce426f
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
ce426f
Date:   Wed Jan 2 11:44:13 2013 +0530
ce426f
ce426f
    Split mantissa calculation loop and add branch prediction
ce426f
ce426f
commit f8af25d218202ff2f5d167b8e44e4b79f91d147f
ce426f
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
ce426f
Date:   Fri Jan 4 15:09:33 2013 +0530
ce426f
ce426f
    Remove commented declarations
ce426f
ce426f
commit a9e48ab40e230c7fe34e4892bec8af4f3f975a20
ce426f
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
ce426f
Date:   Fri Jan 4 15:42:09 2013 +0530
ce426f
ce426f
    Clean up comment for MP_NO
ce426f
ce426f
commit fffb407f4668b40b3df1eb8ee3ae3bc64ee79e20
ce426f
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
ce426f
Date:   Fri Jan 4 22:52:12 2013 +0530
ce426f
ce426f
    Remove unused __cr and __cpymn
ce426f
ce426f
commit 950c99ca9094e7dc6394e90395f51e12093393aa
ce426f
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
ce426f
Date:   Wed Jan 9 19:07:15 2013 +0530
ce426f
ce426f
    Update comments in mpa.c
ce426f
    
ce426f
    Fixed comment style and clearer wording in some cases.
ce426f
ce426f
commit 1066a53440d2744566e97c59bcd0d422186b3e90
ce426f
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
ce426f
Date:   Mon Jan 14 21:31:25 2013 +0530
ce426f
ce426f
    Fix code formatting in mpa.c
ce426f
    
ce426f
    This includes the overridden mpa.c in power4.
ce426f
ce426f
commit 2a91b5735ac1bc65ce5c2a3646d75ba7208e26e9
ce426f
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
ce426f
Date:   Mon Jan 14 21:36:58 2013 +0530
ce426f
ce426f
    Minor tweak to mp multiplication
ce426f
    
ce426f
    Add a local variable to remove extra copies to/from memory in the Z
ce426f
    array.
ce426f
ce426f
ommit 45f058844c33f670475bd02f266942746bcb332b
ce426f
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
ce426f
Date:   Tue Feb 26 21:28:16 2013 +0530
ce426f
ce426f
    Another tweak to the multiplication algorithm
ce426f
    
ce426f
    Reduce the formula to calculate mantissa so that we reduce the net
ce426f
    number of multiplications performed.
ce426f
ce426f
commit bab8a695ee79a5a6e9b2b699938952b006fcbbec
ce426f
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
ce426f
Date:   Thu Feb 21 14:29:18 2013 +0530
ce426f
ce426f
    Fix whitespace differences between generic and powerpc mpa.c
ce426f
ce426f
ce426f
commit 2d0e0f29f85036d1189231cb7c1f19f27ba14a89
ce426f
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
ce426f
Date:   Fri Feb 15 23:56:20 2013 +0530
ce426f
ce426f
    Fix determination of lower precision in __mul
ce426f
ce426f
commit 909279a5cfa938c989c9b01c8f48a0276291ec45
ce426f
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
ce426f
Date:   Wed Feb 13 14:16:23 2013 +0530
ce426f
ce426f
    Optimized mp multiplication
ce426f
    
ce426f
    Don't bother multiplying zeroes since that only wastes cycles.
ce426f
ce426f
commit bdf028142eb77d6ae59500db875068fa5d7b059d
ce426f
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
ce426f
Date:   Wed Feb 13 13:55:29 2013 +0530
ce426f
ce426f
    Clean up add_magnitudes and sub_magnitudes
ce426f
ce426f
commit d6752ccd696c71d23cd3df8fb9cc60b61c32e65a
ce426f
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
ce426f
Date:   Thu Feb 14 10:31:09 2013 +0530
ce426f
ce426f
    New __sqr function as a faster special case of __mul
ce426f
ce426f
commit 4709fe7602b56e9f6ee1ab6afb4067409a784f29
ce426f
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
ce426f
Date:   Sat Feb 16 00:09:29 2013 +0530
ce426f
ce426f
    Use intermediate variable to compute exponent in __mul
ce426f
ce426f
commit 20cd7fb3ae63795ae7c9a464abf5ed19b364ade0
ce426f
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
ce426f
Date:   Wed Feb 20 18:56:20 2013 +0530
ce426f
ce426f
    Copy comment about inner loop from powerpc mpa.c to the default one
ce426f
ce426f
commit e69804d14e43f14c3c65dc570afdbfb822c9838b
ce426f
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
ce426f
Date:   Mon Feb 25 16:43:02 2013 +0530
ce426f
ce426f
    Use long wherever possible in mpa.c
ce426f
    
ce426f
    Using long throughout like powerpc does is beneficial since it reduces
ce426f
    the need to switch to 32-bit instructions.  It gives a very minor
ce426f
    performance improvement.
ce426f
ce426f
commit 82a9811d29c00161c7c8ea7f70e2cc30988e192e
ce426f
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
ce426f
Date:   Thu Mar 7 12:23:29 2013 +0530
ce426f
ce426f
    Use generic mpa.c code for everything except __mul and __sqr
ce426f
ce426f
commit 6f2e90e78f151bab153c2b38492505ae2012db06
ce426f
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
ce426f
Date:   Tue Mar 26 19:28:50 2013 +0530
ce426f
ce426f
    Make mantissa type of mp_no configurable
ce426f
ce426f
    The mantissa of mp_no is intended to take only integral values.  This
ce426f
    is a relatively good choice for powerpc due to its 4 fpus, but not for
ce426f
    other architectures, which suffer due to this choice.  This change
ce426f
    makes the default mantissa a long integer and allows powerpc to
ce426f
    override it.  Additionally, some operations have been optimized for
ce426f
    integer manipulation, resulting in a significant improvement in
ce426f
    performance.
ce426f
ce426f
commit 5739f705eed5cf58e7b439e5983542e06d7fc2da
ce426f
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
ce426f
Date:   Tue Mar 26 20:24:04 2013 +0530
ce426f
ce426f
    Use integral constants
ce426f
    
ce426f
    The compiler is smart enough to convert those into double for powerpc,
ce426f
    but if we put them as doubles, it adds overhead by performing those
ce426f
    operations in floating point mode.
ce426f
ce426f
commit 89f3b6e18c6e7833438789746fcfc2e7189f7cac
ce426f
Author: Joseph Myers <joseph@codesourcery.com>
ce426f
Date:   Thu May 21 23:05:45 2015 +0000
ce426f
ce426f
    Fix sysdeps/ieee754/dbl-64/mpa.c for -Wuninitialized.
ce426f
    
ce426f
    If you remove the "override CFLAGS += -Wno-uninitialized" in
ce426f
    math/Makefile, one of the errors you get is:
ce426f
    
ce426f
    ../sysdeps/ieee754/dbl-64/mpa.c: In function '__mp_dbl.part.0':
ce426f
    ../sysdeps/ieee754/dbl-64/mpa.c:183:5: error: 'c' may be used uninitialized in this function [-Werror=maybe-uninitialized]
ce426f
       c *= X[0];
ce426f
    
ce426f
    The problem is that the p < 5 case initializes c if p is 1, 2, 3 or 4
ce426f
    but not otherwise, and in fact p is positive for all calls to this
ce426f
    function so the uninitialized case can't actually occur.  This patch
ce426f
    replaces the "if (p == 4)" last case with a comment so the compiler
ce426f
    can see that all paths do initialize c.
ce426f
ce426f
    Tested for x86_64.
ce426f
ce426f
            * sysdeps/ieee754/dbl-64/mpa.c (norm): Remove if condition on
ce426f
            (p == 4) case.
ce426f
ce426f
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpa.c
ce426f
===================================================================
ce426f
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpa.c
ce426f
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpa.c
ce426f
@@ -1,7 +1,7 @@
ce426f
 /*
ce426f
  * IBM Accurate Mathematical Library
ce426f
  * written by International Business Machines Corp.
ce426f
- * Copyright (C) 2001, 2011 Free Software Foundation
ce426f
+ * Copyright (C) 2001-2017 Free Software Foundation, Inc.
ce426f
  *
ce426f
  * This program is free software; you can redistribute it and/or modify
ce426f
  * it under the terms of the GNU Lesser General Public License as published by
ce426f
@@ -22,9 +22,7 @@
ce426f
 /*  FUNCTIONS:                                                          */
ce426f
 /*               mcr                                                    */
ce426f
 /*               acr                                                    */
ce426f
-/*               cr                                                     */
ce426f
 /*               cpy                                                    */
ce426f
-/*               cpymn                                                  */
ce426f
 /*               norm                                                   */
ce426f
 /*               denorm                                                 */
ce426f
 /*               mp_dbl                                                 */
ce426f
@@ -44,479 +42,868 @@
ce426f
 
ce426f
 #include "endian.h"
ce426f
 #include "mpa.h"
ce426f
-#include "mpa2.h"
ce426f
-#include <sys/param.h>	/* For MIN() */
ce426f
+#include <sys/param.h>
ce426f
+#include <alloca.h>
ce426f
 
ce426f
 #ifndef SECTION
ce426f
 # define SECTION
ce426f
 #endif
ce426f
 
ce426f
+#ifndef NO__CONST
ce426f
+/* TODO: With only a partial backport of the constant cleanup from
ce426f
+	 upstream we limit the definition of these two constants to
ce426f
+	 this file.  */
ce426f
+static const mp_no __mpone = { 1, { 1.0, 1.0 } };
ce426f
+static const mp_no __mptwo = { 1, { 1.0, 2.0 } };
ce426f
+#endif
ce426f
+
ce426f
 #ifndef NO___ACR
ce426f
-/* mcr() compares the sizes of the mantissas of two multiple precision  */
ce426f
-/* numbers. Mantissas are compared regardless of the signs of the       */
ce426f
-/* numbers, even if x->d[0] or y->d[0] are zero. Exponents are also     */
ce426f
-/* disregarded.                                                         */
ce426f
+/* Compare mantissa of two multiple precision numbers regardless of the sign
ce426f
+   and exponent of the numbers.  */
ce426f
 static int
ce426f
-mcr(const mp_no *x, const mp_no *y, int p) {
ce426f
-  int i;
ce426f
-  for (i=1; i<=p; i++) {
ce426f
-    if      (X[i] == Y[i])  continue;
ce426f
-    else if (X[i] >  Y[i])  return  1;
ce426f
-    else                    return -1; }
ce426f
+mcr (const mp_no *x, const mp_no *y, int p)
ce426f
+{
ce426f
+  long i;
ce426f
+  long p2 = p;
ce426f
+  for (i = 1; i <= p2; i++)
ce426f
+    {
ce426f
+      if (X[i] == Y[i])
ce426f
+	continue;
ce426f
+      else if (X[i] > Y[i])
ce426f
+	return 1;
ce426f
+      else
ce426f
+	return -1;
ce426f
+    }
ce426f
   return 0;
ce426f
 }
ce426f
 
ce426f
-
ce426f
-/* acr() compares the absolute values of two multiple precision numbers */
ce426f
+/* Compare the absolute values of two multiple precision numbers.  */
ce426f
 int
ce426f
-__acr(const mp_no *x, const mp_no *y, int p) {
ce426f
-  int i;
ce426f
-
ce426f
-  if      (X[0] == ZERO) {
ce426f
-    if    (Y[0] == ZERO) i= 0;
ce426f
-    else                 i=-1;
ce426f
-  }
ce426f
-  else if (Y[0] == ZERO) i= 1;
ce426f
-  else {
ce426f
-    if      (EX >  EY)   i= 1;
ce426f
-    else if (EX <  EY)   i=-1;
ce426f
-    else                 i= mcr(x,y,p);
ce426f
-  }
ce426f
-
ce426f
-  return i;
ce426f
-}
ce426f
-#endif
ce426f
-
ce426f
+__acr (const mp_no *x, const mp_no *y, int p)
ce426f
+{
ce426f
+  long i;
ce426f
 
ce426f
-#if 0
ce426f
-/* cr() compares the values of two multiple precision numbers           */
ce426f
-static int  __cr(const mp_no *x, const mp_no *y, int p) {
ce426f
-  int i;
ce426f
-
ce426f
-  if      (X[0] > Y[0])  i= 1;
ce426f
-  else if (X[0] < Y[0])  i=-1;
ce426f
-  else if (X[0] < ZERO ) i= __acr(y,x,p);
ce426f
-  else                   i= __acr(x,y,p);
ce426f
+  if (X[0] == 0)
ce426f
+    {
ce426f
+      if (Y[0] == 0)
ce426f
+	i = 0;
ce426f
+      else
ce426f
+	i = -1;
ce426f
+    }
ce426f
+  else if (Y[0] == 0)
ce426f
+    i = 1;
ce426f
+  else
ce426f
+    {
ce426f
+      if (EX > EY)
ce426f
+	i = 1;
ce426f
+      else if (EX < EY)
ce426f
+	i = -1;
ce426f
+      else
ce426f
+	i = mcr (x, y, p);
ce426f
+    }
ce426f
 
ce426f
   return i;
ce426f
 }
ce426f
 #endif
ce426f
 
ce426f
-
ce426f
 #ifndef NO___CPY
ce426f
-/* Copy a multiple precision number. Set *y=*x. x=y is permissible.      */
ce426f
-void __cpy(const mp_no *x, mp_no *y, int p) {
ce426f
+/* Copy multiple precision number X into Y.  They could be the same
ce426f
+   number.  */
ce426f
+void
ce426f
+__cpy (const mp_no *x, mp_no *y, int p)
ce426f
+{
ce426f
+  long i;
ce426f
+
ce426f
   EY = EX;
ce426f
-  for (int i=0; i <= p; i++)    Y[i] = X[i];
ce426f
+  for (i = 0; i <= p; i++)
ce426f
+    Y[i] = X[i];
ce426f
 }
ce426f
 #endif
ce426f
 
ce426f
+#ifndef NO___MP_DBL
ce426f
+/* Convert a multiple precision number *X into a double precision
ce426f
+   number *Y, normalized case (|x| >= 2**(-1022))).  X has precision
ce426f
+   P, which is positive.  */
ce426f
+static void
ce426f
+norm (const mp_no *x, double *y, int p)
ce426f
+{
ce426f
+# define R RADIXI
ce426f
+  long i;
ce426f
+  double c;
ce426f
+  mantissa_t a, u, v, z[5];
ce426f
+  if (p < 5)
ce426f
+    {
ce426f
+      if (p == 1)
ce426f
+	c = X[1];
ce426f
+      else if (p == 2)
ce426f
+	c = X[1] + R * X[2];
ce426f
+      else if (p == 3)
ce426f
+	c = X[1] + R * (X[2] + R * X[3]);
ce426f
+      else /* p == 4.  */
ce426f
+	c = (X[1] + R * X[2]) + R * R * (X[3] + R * X[4]);
ce426f
+    }
ce426f
+  else
ce426f
+    {
ce426f
+      for (a = 1, z[1] = X[1]; z[1] < TWO23; )
ce426f
+	{
ce426f
+	  a *= 2;
ce426f
+	  z[1] *= 2;
ce426f
+	}
ce426f
 
ce426f
-#if 0
ce426f
-/* Copy a multiple precision number x of precision m into a */
ce426f
-/* multiple precision number y of precision n. In case n>m, */
ce426f
-/* the digits of y beyond the m'th are set to zero. In case */
ce426f
-/* n
ce426f
-/* x=y is permissible.                                      */
ce426f
-
ce426f
-static void __cpymn(const mp_no *x, int m, mp_no *y, int n) {
ce426f
-
ce426f
-  int i,k;
ce426f
-
ce426f
-  EY = EX;     k=MIN(m,n);
ce426f
-  for (i=0; i <= k; i++)    Y[i] = X[i];
ce426f
-  for (   ; i <= n; i++)    Y[i] = ZERO;
ce426f
-}
ce426f
-#endif
ce426f
+      for (i = 2; i < 5; i++)
ce426f
+	{
ce426f
+	  mantissa_store_t d, r;
ce426f
+	  d = X[i] * (mantissa_store_t) a;
ce426f
+	  DIV_RADIX (d, r);
ce426f
+	  z[i] = r;
ce426f
+	  z[i - 1] += d;
ce426f
+	}
ce426f
 
ce426f
+      u = ALIGN_DOWN_TO (z[3], TWO19);
ce426f
+      v = z[3] - u;
ce426f
 
ce426f
-#ifndef NO___MP_DBL
ce426f
-/* Convert a multiple precision number *x into a double precision */
ce426f
-/* number *y, normalized case  (|x| >= 2**(-1022))) */
ce426f
-static void norm(const mp_no *x, double *y, int p)
ce426f
-{
ce426f
-  #define R  radixi.d
ce426f
-  int i;
ce426f
-#if 0
ce426f
-  int k;
ce426f
-#endif
ce426f
-  double a,c,u,v,z[5];
ce426f
-  if (p<5) {
ce426f
-    if      (p==1) c = X[1];
ce426f
-    else if (p==2) c = X[1] + R* X[2];
ce426f
-    else if (p==3) c = X[1] + R*(X[2]  +   R* X[3]);
ce426f
-    else if (p==4) c =(X[1] + R* X[2]) + R*R*(X[3] + R*X[4]);
ce426f
-  }
ce426f
-  else {
ce426f
-    for (a=ONE, z[1]=X[1]; z[1] < TWO23; )
ce426f
-	{a *= TWO;   z[1] *= TWO; }
ce426f
-
ce426f
-    for (i=2; i<5; i++) {
ce426f
-      z[i] = X[i]*a;
ce426f
-      u = (z[i] + CUTTER)-CUTTER;
ce426f
-      if  (u > z[i])  u -= RADIX;
ce426f
-      z[i] -= u;
ce426f
-      z[i-1] += u*RADIXI;
ce426f
-    }
ce426f
-
ce426f
-    u = (z[3] + TWO71) - TWO71;
ce426f
-    if (u > z[3])   u -= TWO19;
ce426f
-    v = z[3]-u;
ce426f
-
ce426f
-    if (v == TWO18) {
ce426f
-      if (z[4] == ZERO) {
ce426f
-	for (i=5; i <= p; i++) {
ce426f
-	  if (X[i] == ZERO)   continue;
ce426f
-	  else                {z[3] += ONE;   break; }
ce426f
+      if (v == TWO18)
ce426f
+	{
ce426f
+	  if (z[4] == 0)
ce426f
+	    {
ce426f
+	      for (i = 5; i <= p; i++)
ce426f
+		{
ce426f
+		  if (X[i] == 0)
ce426f
+		    continue;
ce426f
+		  else
ce426f
+		    {
ce426f
+		      z[3] += 1;
ce426f
+		      break;
ce426f
+		    }
ce426f
+		}
ce426f
+	    }
ce426f
+	  else
ce426f
+	    z[3] += 1;
ce426f
 	}
ce426f
-      }
ce426f
-      else              z[3] += ONE;
ce426f
-    }
ce426f
 
ce426f
-    c = (z[1] + R *(z[2] + R * z[3]))/a;
ce426f
-  }
ce426f
+      c = (z[1] + R * (z[2] + R * z[3])) / a;
ce426f
+    }
ce426f
 
ce426f
   c *= X[0];
ce426f
 
ce426f
-  for (i=1; i
ce426f
-  for (i=1; i>EX; i--)   c *= RADIXI;
ce426f
+  for (i = 1; i < EX; i++)
ce426f
+    c *= RADIX;
ce426f
+  for (i = 1; i > EX; i--)
ce426f
+    c *= RADIXI;
ce426f
 
ce426f
   *y = c;
ce426f
-#undef R
ce426f
+# undef R
ce426f
 }
ce426f
 
ce426f
-/* Convert a multiple precision number *x into a double precision */
ce426f
-/* number *y, denormalized case  (|x| < 2**(-1022))) */
ce426f
-static void denorm(const mp_no *x, double *y, int p)
ce426f
+/* Convert a multiple precision number *X into a double precision
ce426f
+   number *Y, Denormal case  (|x| < 2**(-1022))).  */
ce426f
+static void
ce426f
+denorm (const mp_no *x, double *y, int p)
ce426f
 {
ce426f
-  int i,k;
ce426f
-  double c,u,z[5];
ce426f
-#if 0
ce426f
-  double a,v;
ce426f
-#endif
ce426f
+  long i, k;
ce426f
+  long p2 = p;
ce426f
+  double c;
ce426f
+  mantissa_t u, z[5];
ce426f
+
ce426f
+# define R RADIXI
ce426f
+  if (EX < -44 || (EX == -44 && X[1] < TWO5))
ce426f
+    {
ce426f
+      *y = 0;
ce426f
+      return;
ce426f
+    }
ce426f
 
ce426f
-#define R  radixi.d
ce426f
-  if (EX<-44 || (EX==-44 && X[1]
ce426f
-     { *y=ZERO; return; }
ce426f
-
ce426f
-  if      (p==1) {
ce426f
-    if      (EX==-42) {z[1]=X[1]+TWO10;  z[2]=ZERO;  z[3]=ZERO;  k=3;}
ce426f
-    else if (EX==-43) {z[1]=     TWO10;  z[2]=X[1];  z[3]=ZERO;  k=2;}
ce426f
-    else              {z[1]=     TWO10;  z[2]=ZERO;  z[3]=X[1];  k=1;}
ce426f
-  }
ce426f
-  else if (p==2) {
ce426f
-    if      (EX==-42) {z[1]=X[1]+TWO10;  z[2]=X[2];  z[3]=ZERO;  k=3;}
ce426f
-    else if (EX==-43) {z[1]=     TWO10;  z[2]=X[1];  z[3]=X[2];  k=2;}
ce426f
-    else              {z[1]=     TWO10;  z[2]=ZERO;  z[3]=X[1];  k=1;}
ce426f
-  }
ce426f
-  else {
ce426f
-    if      (EX==-42) {z[1]=X[1]+TWO10;  z[2]=X[2];  k=3;}
ce426f
-    else if (EX==-43) {z[1]=     TWO10;  z[2]=X[1];  k=2;}
ce426f
-    else              {z[1]=     TWO10;  z[2]=ZERO;  k=1;}
ce426f
-    z[3] = X[k];
ce426f
-  }
ce426f
-
ce426f
-  u = (z[3] + TWO57) - TWO57;
ce426f
-  if  (u > z[3])   u -= TWO5;
ce426f
-
ce426f
-  if (u==z[3]) {
ce426f
-    for (i=k+1; i <= p; i++) {
ce426f
-      if (X[i] == ZERO)   continue;
ce426f
-      else {z[3] += ONE;   break; }
ce426f
-    }
ce426f
-  }
ce426f
-
ce426f
-  c = X[0]*((z[1] + R*(z[2] + R*z[3])) - TWO10);
ce426f
-
ce426f
-  *y = c*TWOM1032;
ce426f
-#undef R
ce426f
-}
ce426f
-
ce426f
-/* Convert a multiple precision number *x into a double precision number *y. */
ce426f
-/* The result is correctly rounded to the nearest/even. *x is left unchanged */
ce426f
-
ce426f
-void __mp_dbl(const mp_no *x, double *y, int p) {
ce426f
-#if 0
ce426f
-  int i,k;
ce426f
-  double a,c,u,v,z[5];
ce426f
-#endif
ce426f
+  if (p2 == 1)
ce426f
+    {
ce426f
+      if (EX == -42)
ce426f
+	{
ce426f
+	  z[1] = X[1] + TWO10;
ce426f
+	  z[2] = 0;
ce426f
+	  z[3] = 0;
ce426f
+	  k = 3;
ce426f
+	}
ce426f
+      else if (EX == -43)
ce426f
+	{
ce426f
+	  z[1] = TWO10;
ce426f
+	  z[2] = X[1];
ce426f
+	  z[3] = 0;
ce426f
+	  k = 2;
ce426f
+	}
ce426f
+      else
ce426f
+	{
ce426f
+	  z[1] = TWO10;
ce426f
+	  z[2] = 0;
ce426f
+	  z[3] = X[1];
ce426f
+	  k = 1;
ce426f
+	}
ce426f
+    }
ce426f
+  else if (p2 == 2)
ce426f
+    {
ce426f
+      if (EX == -42)
ce426f
+	{
ce426f
+	  z[1] = X[1] + TWO10;
ce426f
+	  z[2] = X[2];
ce426f
+	  z[3] = 0;
ce426f
+	  k = 3;
ce426f
+	}
ce426f
+      else if (EX == -43)
ce426f
+	{
ce426f
+	  z[1] = TWO10;
ce426f
+	  z[2] = X[1];
ce426f
+	  z[3] = X[2];
ce426f
+	  k = 2;
ce426f
+	}
ce426f
+      else
ce426f
+	{
ce426f
+	  z[1] = TWO10;
ce426f
+	  z[2] = 0;
ce426f
+	  z[3] = X[1];
ce426f
+	  k = 1;
ce426f
+	}
ce426f
+    }
ce426f
+  else
ce426f
+    {
ce426f
+      if (EX == -42)
ce426f
+	{
ce426f
+	  z[1] = X[1] + TWO10;
ce426f
+	  z[2] = X[2];
ce426f
+	  k = 3;
ce426f
+	}
ce426f
+      else if (EX == -43)
ce426f
+	{
ce426f
+	  z[1] = TWO10;
ce426f
+	  z[2] = X[1];
ce426f
+	  k = 2;
ce426f
+	}
ce426f
+      else
ce426f
+	{
ce426f
+	  z[1] = TWO10;
ce426f
+	  z[2] = 0;
ce426f
+	  k = 1;
ce426f
+	}
ce426f
+      z[3] = X[k];
ce426f
+    }
ce426f
 
ce426f
-  if (X[0] == ZERO)  {*y = ZERO;  return; }
ce426f
+  u = ALIGN_DOWN_TO (z[3], TWO5);
ce426f
 
ce426f
-  if      (EX> -42)                 norm(x,y,p);
ce426f
-  else if (EX==-42 && X[1]>=TWO10)  norm(x,y,p);
ce426f
-  else                              denorm(x,y,p);
ce426f
+  if (u == z[3])
ce426f
+    {
ce426f
+      for (i = k + 1; i <= p2; i++)
ce426f
+	{
ce426f
+	  if (X[i] == 0)
ce426f
+	    continue;
ce426f
+	  else
ce426f
+	    {
ce426f
+	      z[3] += 1;
ce426f
+	      break;
ce426f
+	    }
ce426f
+	}
ce426f
+    }
ce426f
+
ce426f
+  c = X[0] * ((z[1] + R * (z[2] + R * z[3])) - TWO10);
ce426f
+
ce426f
+  *y = c * TWOM1032;
ce426f
+# undef R
ce426f
 }
ce426f
-#endif
ce426f
 
ce426f
+/* Convert multiple precision number *X into double precision number *Y.  The
ce426f
+   result is correctly rounded to the nearest/even.  */
ce426f
+void
ce426f
+__mp_dbl (const mp_no *x, double *y, int p)
ce426f
+{
ce426f
+  if (X[0] == 0)
ce426f
+    {
ce426f
+      *y = 0;
ce426f
+      return;
ce426f
+    }
ce426f
 
ce426f
-/* dbl_mp() converts a double precision number x into a multiple precision  */
ce426f
-/* number *y. If the precision p is too small the result is truncated. x is */
ce426f
-/* left unchanged.                                                          */
ce426f
+  if (__glibc_likely (EX > -42 || (EX == -42 && X[1] >= TWO10)))
ce426f
+    norm (x, y, p);
ce426f
+  else
ce426f
+    denorm (x, y, p);
ce426f
+}
ce426f
+#endif
ce426f
 
ce426f
+/* Get the multiple precision equivalent of X into *Y.  If the precision is too
ce426f
+   small, the result is truncated.  */
ce426f
 void
ce426f
 SECTION
ce426f
-__dbl_mp(double x, mp_no *y, int p) {
ce426f
+__dbl_mp (double x, mp_no *y, int p)
ce426f
+{
ce426f
+  long i, n;
ce426f
+  long p2 = p;
ce426f
 
ce426f
-  int i,n;
ce426f
-  double u;
ce426f
+  /* Sign.  */
ce426f
+  if (x == 0)
ce426f
+    {
ce426f
+      Y[0] = 0;
ce426f
+      return;
ce426f
+    }
ce426f
+  else if (x > 0)
ce426f
+    Y[0] = 1;
ce426f
+  else
ce426f
+    {
ce426f
+      Y[0] = -1;
ce426f
+      x = -x;
ce426f
+    }
ce426f
 
ce426f
-  /* Sign */
ce426f
-  if      (x == ZERO)  {Y[0] = ZERO;  return; }
ce426f
-  else if (x >  ZERO)   Y[0] = ONE;
ce426f
-  else                 {Y[0] = MONE;  x=-x;   }
ce426f
-
ce426f
-  /* Exponent */
ce426f
-  for (EY=ONE; x >= RADIX; EY += ONE)   x *= RADIXI;
ce426f
-  for (      ; x <  ONE;   EY -= ONE)   x *= RADIX;
ce426f
-
ce426f
-  /* Digits */
ce426f
-  n=MIN(p,4);
ce426f
-  for (i=1; i<=n; i++) {
ce426f
-    u = (x + TWO52) - TWO52;
ce426f
-    if (u>x)   u -= ONE;
ce426f
-    Y[i] = u;     x -= u;    x *= RADIX; }
ce426f
-  for (   ; i<=p; i++)     Y[i] = ZERO;
ce426f
+  /* Exponent.  */
ce426f
+  for (EY = 1; x >= RADIX; EY += 1)
ce426f
+    x *= RADIXI;
ce426f
+  for (; x < 1; EY -= 1)
ce426f
+    x *= RADIX;
ce426f
+
ce426f
+  /* Digits.  */
ce426f
+  n = MIN (p2, 4);
ce426f
+  for (i = 1; i <= n; i++)
ce426f
+    {
ce426f
+      INTEGER_OF (x, Y[i]);
ce426f
+      x *= RADIX;
ce426f
+    }
ce426f
+  for (; i <= p2; i++)
ce426f
+    Y[i] = 0;
ce426f
 }
ce426f
 
ce426f
-
ce426f
-/*  add_magnitudes() adds the magnitudes of *x & *y assuming that           */
ce426f
-/*  abs(*x) >= abs(*y) > 0.                                                 */
ce426f
-/* The sign of the sum *z is undefined. x&y may overlap but not x&z or y&z. */
ce426f
-/* No guard digit is used. The result equals the exact sum, truncated.      */
ce426f
-/* *x & *y are left unchanged.                                              */
ce426f
-
ce426f
+/* Add magnitudes of *X and *Y assuming that abs (*X) >= abs (*Y) > 0.  The
ce426f
+   sign of the sum *Z is not changed.  X and Y may overlap but not X and Z or
ce426f
+   Y and Z.  No guard digit is used.  The result equals the exact sum,
ce426f
+   truncated.  */
ce426f
 static void
ce426f
 SECTION
ce426f
-add_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) {
ce426f
-
ce426f
-  int i,j,k;
ce426f
+add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
ce426f
+{
ce426f
+  long i, j, k;
ce426f
+  long p2 = p;
ce426f
+  mantissa_t zk;
ce426f
 
ce426f
   EZ = EX;
ce426f
 
ce426f
-  i=p;    j=p+ EY - EX;    k=p+1;
ce426f
+  i = p2;
ce426f
+  j = p2 + EY - EX;
ce426f
+  k = p2 + 1;
ce426f
+
ce426f
+  if (__glibc_unlikely (j < 1))
ce426f
+    {
ce426f
+      __cpy (x, z, p);
ce426f
+      return;
ce426f
+    }
ce426f
 
ce426f
-  if (j<1)
ce426f
-     {__cpy(x,z,p);  return; }
ce426f
-  else   Z[k] = ZERO;
ce426f
-
ce426f
-  for (; j>0; i--,j--) {
ce426f
-    Z[k] += X[i] + Y[j];
ce426f
-    if (Z[k] >= RADIX) {
ce426f
-      Z[k]  -= RADIX;
ce426f
-      Z[--k] = ONE; }
ce426f
-    else
ce426f
-      Z[--k] = ZERO;
ce426f
-  }
ce426f
-
ce426f
-  for (; i>0; i--) {
ce426f
-    Z[k] += X[i];
ce426f
-    if (Z[k] >= RADIX) {
ce426f
-      Z[k]  -= RADIX;
ce426f
-      Z[--k] = ONE; }
ce426f
-    else
ce426f
-      Z[--k] = ZERO;
ce426f
-  }
ce426f
-
ce426f
-  if (Z[1] == ZERO) {
ce426f
-    for (i=1; i<=p; i++)    Z[i] = Z[i+1]; }
ce426f
-  else   EZ += ONE;
ce426f
-}
ce426f
+  zk = 0;
ce426f
 
ce426f
+  for (; j > 0; i--, j--)
ce426f
+    {
ce426f
+      zk += X[i] + Y[j];
ce426f
+      if (zk >= RADIX)
ce426f
+	{
ce426f
+	  Z[k--] = zk - RADIX;
ce426f
+	  zk = 1;
ce426f
+	}
ce426f
+      else
ce426f
+	{
ce426f
+	  Z[k--] = zk;
ce426f
+	  zk = 0;
ce426f
+	}
ce426f
+    }
ce426f
 
ce426f
-/*  sub_magnitudes() subtracts the magnitudes of *x & *y assuming that      */
ce426f
-/*  abs(*x) > abs(*y) > 0.                                                  */
ce426f
-/* The sign of the difference *z is undefined. x&y may overlap but not x&z  */
ce426f
-/* or y&z. One guard digit is used. The error is less than one ulp.         */
ce426f
-/* *x & *y are left unchanged.                                              */
ce426f
+  for (; i > 0; i--)
ce426f
+    {
ce426f
+      zk += X[i];
ce426f
+      if (zk >= RADIX)
ce426f
+	{
ce426f
+	  Z[k--] = zk - RADIX;
ce426f
+	  zk = 1;
ce426f
+	}
ce426f
+      else
ce426f
+	{
ce426f
+	  Z[k--] = zk;
ce426f
+	  zk = 0;
ce426f
+	}
ce426f
+    }
ce426f
 
ce426f
+  if (zk == 0)
ce426f
+    {
ce426f
+      for (i = 1; i <= p2; i++)
ce426f
+	Z[i] = Z[i + 1];
ce426f
+    }
ce426f
+  else
ce426f
+    {
ce426f
+      Z[1] = zk;
ce426f
+      EZ += 1;
ce426f
+    }
ce426f
+}
ce426f
+
ce426f
+/* Subtract the magnitudes of *X and *Y assuming that abs (*x) > abs (*y) > 0.
ce426f
+   The sign of the difference *Z is not changed.  X and Y may overlap but not X
ce426f
+   and Z or Y and Z.  One guard digit is used.  The error is less than one
ce426f
+   ULP.  */
ce426f
 static void
ce426f
 SECTION
ce426f
-sub_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) {
ce426f
-
ce426f
-  int i,j,k;
ce426f
+sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
ce426f
+{
ce426f
+  long i, j, k;
ce426f
+  long p2 = p;
ce426f
+  mantissa_t zk;
ce426f
 
ce426f
   EZ = EX;
ce426f
+  i = p2;
ce426f
+  j = p2 + EY - EX;
ce426f
+  k = p2;
ce426f
+
ce426f
+  /* Y is too small compared to X, copy X over to the result.  */
ce426f
+  if (__glibc_unlikely (j < 1))
ce426f
+    {
ce426f
+      __cpy (x, z, p);
ce426f
+      return;
ce426f
+    }
ce426f
+
ce426f
+  /* The relevant least significant digit in Y is non-zero, so we factor it in
ce426f
+     to enhance accuracy.  */
ce426f
+  if (j < p2 && Y[j + 1] > 0)
ce426f
+    {
ce426f
+      Z[k + 1] = RADIX - Y[j + 1];
ce426f
+      zk = -1;
ce426f
+    }
ce426f
+  else
ce426f
+    zk = Z[k + 1] = 0;
ce426f
 
ce426f
-  if (EX == EY) {
ce426f
-    i=j=k=p;
ce426f
-    Z[k] = Z[k+1] = ZERO; }
ce426f
-  else {
ce426f
-    j= EX - EY;
ce426f
-    if (j > p)  {__cpy(x,z,p);  return; }
ce426f
-    else {
ce426f
-      i=p;   j=p+1-j;   k=p;
ce426f
-      if (Y[j] > ZERO) {
ce426f
-	Z[k+1] = RADIX - Y[j--];
ce426f
-	Z[k]   = MONE; }
ce426f
-      else {
ce426f
-	Z[k+1] = ZERO;
ce426f
-	Z[k]   = ZERO;   j--;}
ce426f
-    }
ce426f
-  }
ce426f
-
ce426f
-  for (; j>0; i--,j--) {
ce426f
-    Z[k] += (X[i] - Y[j]);
ce426f
-    if (Z[k] < ZERO) {
ce426f
-      Z[k]  += RADIX;
ce426f
-      Z[--k] = MONE; }
ce426f
-    else
ce426f
-      Z[--k] = ZERO;
ce426f
-  }
ce426f
-
ce426f
-  for (; i>0; i--) {
ce426f
-    Z[k] += X[i];
ce426f
-    if (Z[k] < ZERO) {
ce426f
-      Z[k]  += RADIX;
ce426f
-      Z[--k] = MONE; }
ce426f
-    else
ce426f
-      Z[--k] = ZERO;
ce426f
-  }
ce426f
+  /* Subtract and borrow.  */
ce426f
+  for (; j > 0; i--, j--)
ce426f
+    {
ce426f
+      zk += (X[i] - Y[j]);
ce426f
+      if (zk < 0)
ce426f
+	{
ce426f
+	  Z[k--] = zk + RADIX;
ce426f
+	  zk = -1;
ce426f
+	}
ce426f
+      else
ce426f
+	{
ce426f
+	  Z[k--] = zk;
ce426f
+	  zk = 0;
ce426f
+	}
ce426f
+    }
ce426f
 
ce426f
-  for (i=1; Z[i] == ZERO; i++) ;
ce426f
+  /* We're done with digits from Y, so it's just digits in X.  */
ce426f
+  for (; i > 0; i--)
ce426f
+    {
ce426f
+      zk += X[i];
ce426f
+      if (zk < 0)
ce426f
+	{
ce426f
+	  Z[k--] = zk + RADIX;
ce426f
+	  zk = -1;
ce426f
+	}
ce426f
+      else
ce426f
+	{
ce426f
+	  Z[k--] = zk;
ce426f
+	  zk = 0;
ce426f
+	}
ce426f
+    }
ce426f
+
ce426f
+  /* Normalize.  */
ce426f
+  for (i = 1; Z[i] == 0; i++)
ce426f
+    ;
ce426f
   EZ = EZ - i + 1;
ce426f
-  for (k=1; i <= p+1; )
ce426f
+  for (k = 1; i <= p2 + 1; )
ce426f
     Z[k++] = Z[i++];
ce426f
-  for (; k <= p; )
ce426f
-    Z[k++] = ZERO;
ce426f
+  for (; k <= p2; )
ce426f
+    Z[k++] = 0;
ce426f
 }
ce426f
 
ce426f
-
ce426f
-/* Add two multiple precision numbers. Set *z = *x + *y. x&y may overlap  */
ce426f
-/* but not x&z or y&z. One guard digit is used. The error is less than    */
ce426f
-/* one ulp. *x & *y are left unchanged.                                   */
ce426f
-
ce426f
+/* Add *X and *Y and store the result in *Z.  X and Y may overlap, but not X
ce426f
+   and Z or Y and Z.  One guard digit is used.  The error is less than one
ce426f
+   ULP.  */
ce426f
 void
ce426f
 SECTION
ce426f
-__add(const mp_no *x, const mp_no *y, mp_no *z, int p) {
ce426f
-
ce426f
+__add (const mp_no *x, const mp_no *y, mp_no *z, int p)
ce426f
+{
ce426f
   int n;
ce426f
 
ce426f
-  if      (X[0] == ZERO)     {__cpy(y,z,p);  return; }
ce426f
-  else if (Y[0] == ZERO)     {__cpy(x,z,p);  return; }
ce426f
+  if (X[0] == 0)
ce426f
+    {
ce426f
+      __cpy (y, z, p);
ce426f
+      return;
ce426f
+    }
ce426f
+  else if (Y[0] == 0)
ce426f
+    {
ce426f
+      __cpy (x, z, p);
ce426f
+      return;
ce426f
+    }
ce426f
 
ce426f
-  if (X[0] == Y[0])   {
ce426f
-    if (__acr(x,y,p) > 0)      {add_magnitudes(x,y,z,p);  Z[0] = X[0]; }
ce426f
-    else                     {add_magnitudes(y,x,z,p);  Z[0] = Y[0]; }
ce426f
-  }
ce426f
-  else                       {
ce426f
-    if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p);  Z[0] = X[0]; }
ce426f
-    else if (n == -1)        {sub_magnitudes(y,x,z,p);  Z[0] = Y[0]; }
ce426f
-    else                      Z[0] = ZERO;
ce426f
-  }
ce426f
+  if (X[0] == Y[0])
ce426f
+    {
ce426f
+      if (__acr (x, y, p) > 0)
ce426f
+	{
ce426f
+	  add_magnitudes (x, y, z, p);
ce426f
+	  Z[0] = X[0];
ce426f
+	}
ce426f
+      else
ce426f
+	{
ce426f
+	  add_magnitudes (y, x, z, p);
ce426f
+	  Z[0] = Y[0];
ce426f
+	}
ce426f
+    }
ce426f
+  else
ce426f
+    {
ce426f
+      if ((n = __acr (x, y, p)) == 1)
ce426f
+	{
ce426f
+	  sub_magnitudes (x, y, z, p);
ce426f
+	  Z[0] = X[0];
ce426f
+	}
ce426f
+      else if (n == -1)
ce426f
+	{
ce426f
+	  sub_magnitudes (y, x, z, p);
ce426f
+	  Z[0] = Y[0];
ce426f
+	}
ce426f
+      else
ce426f
+	Z[0] = 0;
ce426f
+    }
ce426f
 }
ce426f
 
ce426f
+/* Subtract *Y from *X and return the result in *Z.  X and Y may overlap but
ce426f
+   not X and Z or Y and Z.  One guard digit is used.  The error is less than
ce426f
+   one ULP.  */
ce426f
+void
ce426f
+SECTION
ce426f
+__sub (const mp_no *x, const mp_no *y, mp_no *z, int p)
ce426f
+{
ce426f
+  int n;
ce426f
+
ce426f
+  if (X[0] == 0)
ce426f
+    {
ce426f
+      __cpy (y, z, p);
ce426f
+      Z[0] = -Z[0];
ce426f
+      return;
ce426f
+    }
ce426f
+  else if (Y[0] == 0)
ce426f
+    {
ce426f
+      __cpy (x, z, p);
ce426f
+      return;
ce426f
+    }
ce426f
 
ce426f
-/* Subtract two multiple precision numbers. *z is set to *x - *y. x&y may */
ce426f
-/* overlap but not x&z or y&z. One guard digit is used. The error is      */
ce426f
-/* less than one ulp. *x & *y are left unchanged.                         */
ce426f
+  if (X[0] != Y[0])
ce426f
+    {
ce426f
+      if (__acr (x, y, p) > 0)
ce426f
+	{
ce426f
+	  add_magnitudes (x, y, z, p);
ce426f
+	  Z[0] = X[0];
ce426f
+	}
ce426f
+      else
ce426f
+	{
ce426f
+	  add_magnitudes (y, x, z, p);
ce426f
+	  Z[0] = -Y[0];
ce426f
+	}
ce426f
+    }
ce426f
+  else
ce426f
+    {
ce426f
+      if ((n = __acr (x, y, p)) == 1)
ce426f
+	{
ce426f
+	  sub_magnitudes (x, y, z, p);
ce426f
+	  Z[0] = X[0];
ce426f
+	}
ce426f
+      else if (n == -1)
ce426f
+	{
ce426f
+	  sub_magnitudes (y, x, z, p);
ce426f
+	  Z[0] = -Y[0];
ce426f
+	}
ce426f
+      else
ce426f
+	Z[0] = 0;
ce426f
+    }
ce426f
+}
ce426f
 
ce426f
+#ifndef NO__MUL
ce426f
+/* Multiply *X and *Y and store result in *Z.  X and Y may overlap but not X
ce426f
+   and Z or Y and Z.  For P in [1, 2, 3], the exact result is truncated to P
ce426f
+   digits.  In case P > 3 the error is bounded by 1.001 ULP.  */
ce426f
 void
ce426f
 SECTION
ce426f
-__sub(const mp_no *x, const mp_no *y, mp_no *z, int p) {
ce426f
+__mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
ce426f
+{
ce426f
+  long i, j, k, ip, ip2;
ce426f
+  long p2 = p;
ce426f
+  mantissa_store_t zk;
ce426f
+  const mp_no *a;
ce426f
+  mantissa_store_t *diag;
ce426f
+
ce426f
+  /* Is z=0?  */
ce426f
+  if (__glibc_unlikely (X[0] * Y[0] == 0))
ce426f
+    {
ce426f
+      Z[0] = 0;
ce426f
+      return;
ce426f
+    }
ce426f
 
ce426f
-  int n;
ce426f
+  /* We need not iterate through all X's and Y's since it's pointless to
ce426f
+     multiply zeroes.  Here, both are zero...  */
ce426f
+  for (ip2 = p2; ip2 > 0; ip2--)
ce426f
+    if (X[ip2] != 0 || Y[ip2] != 0)
ce426f
+      break;
ce426f
+
ce426f
+  a = X[ip2] != 0 ? y : x;
ce426f
+
ce426f
+  /* ... and here, at least one of them is still zero.  */
ce426f
+  for (ip = ip2; ip > 0; ip--)
ce426f
+    if (a->d[ip] != 0)
ce426f
+      break;
ce426f
+
ce426f
+  /* The product looks like this for p = 3 (as an example):
ce426f
+
ce426f
+
ce426f
+				a1    a2    a3
ce426f
+		 x		b1    b2    b3
ce426f
+		 -----------------------------
ce426f
+			     a1*b3 a2*b3 a3*b3
ce426f
+		       a1*b2 a2*b2 a3*b2
ce426f
+		 a1*b1 a2*b1 a3*b1
ce426f
+
ce426f
+     So our K needs to ideally be P*2, but we're limiting ourselves to P + 3
ce426f
+     for P >= 3.  We compute the above digits in two parts; the last P-1
ce426f
+     digits and then the first P digits.  The last P-1 digits are a sum of
ce426f
+     products of the input digits from P to P-k where K is 0 for the least
ce426f
+     significant digit and increases as we go towards the left.  The product
ce426f
+     term is of the form X[k]*X[P-k] as can be seen in the above example.
ce426f
+
ce426f
+     The first P digits are also a sum of products with the same product term,
ce426f
+     except that the sum is from 1 to k.  This is also evident from the above
ce426f
+     example.
ce426f
+
ce426f
+     Another thing that becomes evident is that only the most significant
ce426f
+     ip+ip2 digits of the result are non-zero, where ip and ip2 are the
ce426f
+     'internal precision' of the input numbers, i.e. digits after ip and ip2
ce426f
+     are all 0.  */
ce426f
+
ce426f
+  k = (__glibc_unlikely (p2 < 3)) ? p2 + p2 : p2 + 3;
ce426f
+
ce426f
+  while (k > ip + ip2 + 1)
ce426f
+    Z[k--] = 0;
ce426f
+
ce426f
+  zk = 0;
ce426f
+
ce426f
+  /* Precompute sums of diagonal elements so that we can directly use them
ce426f
+     later.  See the next comment to know we why need them.  */
ce426f
+  diag = alloca (k * sizeof (mantissa_store_t));
ce426f
+  mantissa_store_t d = 0;
ce426f
+  for (i = 1; i <= ip; i++)
ce426f
+    {
ce426f
+      d += X[i] * (mantissa_store_t) Y[i];
ce426f
+      diag[i] = d;
ce426f
+    }
ce426f
+  while (i < k)
ce426f
+    diag[i++] = d;
ce426f
 
ce426f
-  if      (X[0] == ZERO)     {__cpy(y,z,p);  Z[0] = -Z[0];  return; }
ce426f
-  else if (Y[0] == ZERO)     {__cpy(x,z,p);                 return; }
ce426f
+  while (k > p2)
ce426f
+    {
ce426f
+      long lim = k / 2;
ce426f
 
ce426f
-  if (X[0] != Y[0])    {
ce426f
-    if (__acr(x,y,p) > 0)      {add_magnitudes(x,y,z,p);  Z[0] =  X[0]; }
ce426f
-    else                     {add_magnitudes(y,x,z,p);  Z[0] = -Y[0]; }
ce426f
-  }
ce426f
-  else                       {
ce426f
-    if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p);  Z[0] =  X[0]; }
ce426f
-    else if (n == -1)        {sub_magnitudes(y,x,z,p);  Z[0] = -Y[0]; }
ce426f
-    else                      Z[0] = ZERO;
ce426f
-  }
ce426f
-}
ce426f
+      if (k % 2 == 0)
ce426f
+	/* We want to add this only once, but since we subtract it in the sum
ce426f
+	   of products above, we add twice.  */
ce426f
+	zk += 2 * X[lim] * (mantissa_store_t) Y[lim];
ce426f
 
ce426f
+      for (i = k - p2, j = p2; i < j; i++, j--)
ce426f
+	zk += (X[i] + X[j]) * (mantissa_store_t) (Y[i] + Y[j]);
ce426f
 
ce426f
-/* Multiply two multiple precision numbers. *z is set to *x * *y. x&y      */
ce426f
-/* may overlap but not x&z or y&z. In case p=1,2,3 the exact result is     */
ce426f
-/* truncated to p digits. In case p>3 the error is bounded by 1.001 ulp.   */
ce426f
-/* *x & *y are left unchanged.                                             */
ce426f
+      zk -= diag[k - 1];
ce426f
 
ce426f
-void
ce426f
-SECTION
ce426f
-__mul(const mp_no *x, const mp_no *y, mp_no *z, int p) {
ce426f
+      DIV_RADIX (zk, Z[k]);
ce426f
+      k--;
ce426f
+    }
ce426f
 
ce426f
-  int i, i1, i2, j, k, k2;
ce426f
-  double u;
ce426f
+  /* The real deal.  Mantissa digit Z[k] is the sum of all X[i] * Y[j] where i
ce426f
+     goes from 1 -> k - 1 and j goes the same range in reverse.  To reduce the
ce426f
+     number of multiplications, we halve the range and if k is an even number,
ce426f
+     add the diagonal element X[k/2]Y[k/2].  Through the half range, we compute
ce426f
+     X[i] * Y[j] as (X[i] + X[j]) * (Y[i] + Y[j]) - X[i] * Y[i] - X[j] * Y[j].
ce426f
+
ce426f
+     This reduction tells us that we're summing two things, the first term
ce426f
+     through the half range and the negative of the sum of the product of all
ce426f
+     terms of X and Y in the full range.  i.e.
ce426f
+
ce426f
+     SUM(X[i] * Y[i]) for k terms.  This is precalculated above for each k in
ce426f
+     a single loop so that it completes in O(n) time and can hence be directly
ce426f
+     used in the loop below.  */
ce426f
+  while (k > 1)
ce426f
+    {
ce426f
+      long lim = k / 2;
ce426f
+
ce426f
+      if (k % 2 == 0)
ce426f
+	/* We want to add this only once, but since we subtract it in the sum
ce426f
+	   of products above, we add twice.  */
ce426f
+        zk += 2 * X[lim] * (mantissa_store_t) Y[lim];
ce426f
 
ce426f
-		      /* Is z=0? */
ce426f
-  if (X[0]*Y[0]==ZERO)
ce426f
-     { Z[0]=ZERO;  return; }
ce426f
-
ce426f
-		       /* Multiply, add and carry */
ce426f
-  k2 = (p<3) ? p+p : p+3;
ce426f
-  Z[k2]=ZERO;
ce426f
-  for (k=k2; k>1; ) {
ce426f
-    if (k > p)  {i1=k-p; i2=p+1; }
ce426f
-    else        {i1=1;   i2=k;   }
ce426f
-    for (i=i1,j=i2-1; i
ce426f
-
ce426f
-    u = (Z[k] + CUTTER)-CUTTER;
ce426f
-    if  (u > Z[k])  u -= RADIX;
ce426f
-    Z[k]  -= u;
ce426f
-    Z[--k] = u*RADIXI;
ce426f
-  }
ce426f
-
ce426f
-		 /* Is there a carry beyond the most significant digit? */
ce426f
-  if (Z[1] == ZERO) {
ce426f
-    for (i=1; i<=p; i++)  Z[i]=Z[i+1];
ce426f
-    EZ = EX + EY - 1; }
ce426f
-  else
ce426f
-    EZ = EX + EY;
ce426f
+      for (i = 1, j = k - 1; i < j; i++, j--)
ce426f
+	zk += (X[i] + X[j]) * (mantissa_store_t) (Y[i] + Y[j]);
ce426f
+
ce426f
+      zk -= diag[k - 1];
ce426f
+
ce426f
+      DIV_RADIX (zk, Z[k]);
ce426f
+      k--;
ce426f
+    }
ce426f
+  Z[k] = zk;
ce426f
+
ce426f
+  /* Get the exponent sum into an intermediate variable.  This is a subtle
ce426f
+     optimization, where given enough registers, all operations on the exponent
ce426f
+     happen in registers and the result is written out only once into EZ.  */
ce426f
+  int e = EX + EY;
ce426f
+
ce426f
+  /* Is there a carry beyond the most significant digit?  */
ce426f
+  if (__glibc_unlikely (Z[1] == 0))
ce426f
+    {
ce426f
+      for (i = 1; i <= p2; i++)
ce426f
+	Z[i] = Z[i + 1];
ce426f
+      e--;
ce426f
+    }
ce426f
 
ce426f
+  EZ = e;
ce426f
   Z[0] = X[0] * Y[0];
ce426f
 }
ce426f
+#endif
ce426f
+
ce426f
+#ifndef NO__SQR
ce426f
+/* Square *X and store result in *Y.  X and Y may not overlap.  For P in
ce426f
+   [1, 2, 3], the exact result is truncated to P digits.  In case P > 3 the
ce426f
+   error is bounded by 1.001 ULP.  This is a faster special case of
ce426f
+   multiplication.  */
ce426f
+void
ce426f
+SECTION
ce426f
+__sqr (const mp_no *x, mp_no *y, int p)
ce426f
+{
ce426f
+  long i, j, k, ip;
ce426f
+  mantissa_store_t yk;
ce426f
 
ce426f
+  /* Is z=0?  */
ce426f
+  if (__glibc_unlikely (X[0] == 0))
ce426f
+    {
ce426f
+      Y[0] = 0;
ce426f
+      return;
ce426f
+    }
ce426f
 
ce426f
-/* Invert a multiple precision number. Set *y = 1 / *x.                     */
ce426f
-/* Relative error bound = 1.001*r**(1-p) for p=2, 1.063*r**(1-p) for p=3,   */
ce426f
-/* 2.001*r**(1-p) for p>3.                                                  */
ce426f
-/* *x=0 is not permissible. *x is left unchanged.                           */
ce426f
+  /* We need not iterate through all X's since it's pointless to
ce426f
+     multiply zeroes.  */
ce426f
+  for (ip = p; ip > 0; ip--)
ce426f
+    if (X[ip] != 0)
ce426f
+      break;
ce426f
 
ce426f
-static
ce426f
-SECTION
ce426f
-void __inv(const mp_no *x, mp_no *y, int p) {
ce426f
-  int i;
ce426f
-#if 0
ce426f
-  int l;
ce426f
+  k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
ce426f
+
ce426f
+  while (k > 2 * ip + 1)
ce426f
+    Y[k--] = 0;
ce426f
+
ce426f
+  yk = 0;
ce426f
+
ce426f
+  while (k > p)
ce426f
+    {
ce426f
+      mantissa_store_t yk2 = 0;
ce426f
+      long lim = k / 2;
ce426f
+
ce426f
+      if (k % 2 == 0)
ce426f
+	yk += X[lim] * (mantissa_store_t) X[lim];
ce426f
+
ce426f
+      /* In __mul, this loop (and the one within the next while loop) run
ce426f
+         between a range to calculate the mantissa as follows:
ce426f
+
ce426f
+         Z[k] = X[k] * Y[n] + X[k+1] * Y[n-1] ... + X[n-1] * Y[k+1]
ce426f
+		+ X[n] * Y[k]
ce426f
+
ce426f
+         For X == Y, we can get away with summing halfway and doubling the
ce426f
+	 result.  For cases where the range size is even, the mid-point needs
ce426f
+	 to be added separately (above).  */
ce426f
+      for (i = k - p, j = p; i < j; i++, j--)
ce426f
+	yk2 += X[i] * (mantissa_store_t) X[j];
ce426f
+
ce426f
+      yk += 2 * yk2;
ce426f
+
ce426f
+      DIV_RADIX (yk, Y[k]);
ce426f
+      k--;
ce426f
+    }
ce426f
+
ce426f
+  while (k > 1)
ce426f
+    {
ce426f
+      mantissa_store_t yk2 = 0;
ce426f
+      long lim = k / 2;
ce426f
+
ce426f
+      if (k % 2 == 0)
ce426f
+	yk += X[lim] * (mantissa_store_t) X[lim];
ce426f
+
ce426f
+      /* Likewise for this loop.  */
ce426f
+      for (i = 1, j = k - 1; i < j; i++, j--)
ce426f
+	yk2 += X[i] * (mantissa_store_t) X[j];
ce426f
+
ce426f
+      yk += 2 * yk2;
ce426f
+
ce426f
+      DIV_RADIX (yk, Y[k]);
ce426f
+      k--;
ce426f
+    }
ce426f
+  Y[k] = yk;
ce426f
+
ce426f
+  /* Squares are always positive.  */
ce426f
+  Y[0] = 1;
ce426f
+
ce426f
+  /* Get the exponent sum into an intermediate variable.  This is a subtle
ce426f
+     optimization, where given enough registers, all operations on the exponent
ce426f
+     happen in registers and the result is written out only once into EZ.  */
ce426f
+  int e = EX * 2;
ce426f
+
ce426f
+  /* Is there a carry beyond the most significant digit?  */
ce426f
+  if (__glibc_unlikely (Y[1] == 0))
ce426f
+    {
ce426f
+      for (i = 1; i <= p; i++)
ce426f
+	Y[i] = Y[i + 1];
ce426f
+      e--;
ce426f
+    }
ce426f
+
ce426f
+  EY = e;
ce426f
+}
ce426f
 #endif
ce426f
+
ce426f
+/* Invert *X and store in *Y.  Relative error bound:
ce426f
+   - For P = 2: 1.001 * R ^ (1 - P)
ce426f
+   - For P = 3: 1.063 * R ^ (1 - P)
ce426f
+   - For P > 3: 2.001 * R ^ (1 - P)
ce426f
+
ce426f
+   *X = 0 is not permissible.  */
ce426f
+static void
ce426f
+SECTION
ce426f
+__inv (const mp_no *x, mp_no *y, int p)
ce426f
+{
ce426f
+  long i;
ce426f
   double t;
ce426f
-  mp_no z,w;
ce426f
-  static const int np1[] = {0,0,0,0,1,2,2,2,2,3,3,3,3,3,3,3,3,3,
ce426f
-			    4,4,4,4,4,4,4,4,4,4,4,4,4,4,4};
ce426f
-  const mp_no mptwo = {1,{1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
ce426f
-			 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
ce426f
-			 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
ce426f
-			 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}};
ce426f
-
ce426f
-  __cpy(x,&z,p);  z.e=0;  __mp_dbl(&z,&t,p);
ce426f
-  t=ONE/t;   __dbl_mp(t,y,p);    EY -= EX;
ce426f
-
ce426f
-  for (i=0; i
ce426f
-    __cpy(y,&w,p);
ce426f
-    __mul(x,&w,y,p);
ce426f
-    __sub(&mptwo,y,&z,p);
ce426f
-    __mul(&w,&z,y,p);
ce426f
-  }
ce426f
+  mp_no z, w;
ce426f
+  static const int np1[] =
ce426f
+    { 0, 0, 0, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
ce426f
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
ce426f
+  };
ce426f
+
ce426f
+  __cpy (x, &z, p);
ce426f
+  z.e = 0;
ce426f
+  __mp_dbl (&z, &t, p);
ce426f
+  t = 1 / t;
ce426f
+  __dbl_mp (t, y, p);
ce426f
+  EY -= EX;
ce426f
+
ce426f
+  for (i = 0; i < np1[p]; i++)
ce426f
+    {
ce426f
+      __cpy (y, &w, p);
ce426f
+      __mul (x, &w, y, p);
ce426f
+      __sub (&__mptwo, y, &z, p);
ce426f
+      __mul (&w, &z, y, p);
ce426f
+    }
ce426f
 }
ce426f
 
ce426f
+/* Divide *X by *Y and store result in *Z.  X and Y may overlap but not X and Z
ce426f
+   or Y and Z.  Relative error bound:
ce426f
+   - For P = 2: 2.001 * R ^ (1 - P)
ce426f
+   - For P = 3: 2.063 * R ^ (1 - P)
ce426f
+   - For P > 3: 3.001 * R ^ (1 - P)
ce426f
 
ce426f
-/* Divide one multiple precision number by another.Set *z = *x / *y. *x & *y */
ce426f
-/* are left unchanged. x&y may overlap but not x&z or y&z.                   */
ce426f
-/* Relative error bound = 2.001*r**(1-p) for p=2, 2.063*r**(1-p) for p=3     */
ce426f
-/* and 3.001*r**(1-p) for p>3. *y=0 is not permissible.                      */
ce426f
-
ce426f
+   *X = 0 is not permissible.  */
ce426f
 void
ce426f
 SECTION
ce426f
-__dvd(const mp_no *x, const mp_no *y, mp_no *z, int p) {
ce426f
-
ce426f
+__dvd (const mp_no *x, const mp_no *y, mp_no *z, int p)
ce426f
+{
ce426f
   mp_no w;
ce426f
 
ce426f
-  if (X[0] == ZERO)    Z[0] = ZERO;
ce426f
-  else                {__inv(y,&w,p);   __mul(x,&w,z,p);}
ce426f
+  if (X[0] == 0)
ce426f
+    Z[0] = 0;
ce426f
+  else
ce426f
+    {
ce426f
+      __inv (y, &w, p);
ce426f
+      __mul (x, &w, z, p);
ce426f
+    }
ce426f
 }
ce426f
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpa.h
ce426f
===================================================================
ce426f
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpa.h
ce426f
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpa.h
ce426f
@@ -1,7 +1,7 @@
ce426f
 /*
ce426f
  * IBM Accurate Mathematical Library
ce426f
  * Written by International Business Machines Corp.
ce426f
- * Copyright (C) 2001, 2011 Free Software Foundation, Inc.
ce426f
+ * Copyright (C) 2001-2017 Free Software Foundation, Inc.
ce426f
  *
ce426f
  * This program is free software; you can redistribute it and/or modify
ce426f
  * it under the terms of the GNU Lesser General Public License as published by
ce426f
@@ -23,36 +23,58 @@
ce426f
 /*  FUNCTIONS:                                                          */
ce426f
 /*               mcr                                                    */
ce426f
 /*               acr                                                    */
ce426f
-/*               cr                                                     */
ce426f
 /*               cpy                                                    */
ce426f
-/*               cpymn                                                  */
ce426f
 /*               mp_dbl                                                 */
ce426f
 /*               dbl_mp                                                 */
ce426f
 /*               add                                                    */
ce426f
 /*               sub                                                    */
ce426f
 /*               mul                                                    */
ce426f
-/*               inv                                                    */
ce426f
 /*               dvd                                                    */
ce426f
 /*                                                                      */
ce426f
 /* Arithmetic functions for multiple precision numbers.                 */
ce426f
 /* Common types and definition                                          */
ce426f
 /************************************************************************/
ce426f
 
ce426f
+#include <mpa-arch.h>
ce426f
 
ce426f
-typedef struct {/* This structure holds the details of a multi-precision     */
ce426f
-  int e;        /* floating point number, x: d[0] holds its sign (-1,0 or 1) */
ce426f
-  double d[40]; /* e holds its exponent (...,-2,-1,0,1,2,...) and            */
ce426f
-} mp_no;        /* d[1]...d[p] hold its mantissa digits. The value of x is,  */
ce426f
-		/* x = d[1]*r**(e-1) + d[2]*r**(e-2) + ... + d[p]*r**(e-p).  */
ce426f
-		/* Here   r = 2**24,   0 <= d[i] < r  and  1 <= p <= 32.     */
ce426f
-		/* p is a global variable. A multi-precision number is       */
ce426f
-		/* always normalized. Namely, d[1] > 0. An exception is      */
ce426f
-		/* a zero which is characterized by d[0] = 0. The terms      */
ce426f
-		/* d[p+1], d[p+2], ... of a none zero number have no         */
ce426f
-		/* significance and so are the terms e, d[1],d[2],...        */
ce426f
-		/* of a zero.                                                */
ce426f
+/* The mp_no structure holds the details of a multi-precision floating point
ce426f
+   number.
ce426f
 
ce426f
-typedef union { int i[2]; double d; } number;
ce426f
+   - The radix of the number (R) is 2 ^ 24.
ce426f
+
ce426f
+   - E: The exponent of the number.
ce426f
+
ce426f
+   - D[0]: The sign (-1, 1) or 0 if the value is 0.  In the latter case, the
ce426f
+     values of the remaining members of the structure are ignored.
ce426f
+
ce426f
+   - D[1] - D[p]: The mantissa of the number where:
ce426f
+
ce426f
+	0 <= D[i] < R and
ce426f
+	P is the precision of the number and 1 <= p <= 32
ce426f
+
ce426f
+     D[p+1] ... D[39] have no significance.
ce426f
+
ce426f
+   - The value of the number is:
ce426f
+
ce426f
+	D[1] * R ^ (E - 1) + D[2] * R ^ (E - 2) ... D[p] * R ^ (E - p)
ce426f
+
ce426f
+   */
ce426f
+typedef struct
ce426f
+{
ce426f
+  int e;
ce426f
+  mantissa_t d[40];
ce426f
+} mp_no;
ce426f
+
ce426f
+typedef union
ce426f
+{
ce426f
+  int i[2];
ce426f
+  double d;
ce426f
+} number;
ce426f
+
ce426f
+/* TODO: With only a partial backport of the constant cleanup we don't
ce426f
+         define __mpone or __mptwo here for other code to use.  */
ce426f
+/* extern const mp_no __mpone;
ce426f
+extern const mp_no __mptwo;  */
ce426f
 
ce426f
 #define  X   x->d
ce426f
 #define  Y   y->d
ce426f
@@ -63,21 +85,73 @@ typedef union { int i[2]; double d; } nu
ce426f
 
ce426f
 #define ABS(x)   ((x) <  0  ? -(x) : (x))
ce426f
 
ce426f
-int __acr(const mp_no *, const mp_no *, int);
ce426f
-// int  __cr(const mp_no *, const mp_no *, int);
ce426f
-void __cpy(const mp_no *, mp_no *, int);
ce426f
-// void __cpymn(const mp_no *, int, mp_no *, int);
ce426f
-void __mp_dbl(const mp_no *, double *, int);
ce426f
-void __dbl_mp(double, mp_no *, int);
ce426f
-void __add(const mp_no *, const mp_no *, mp_no *, int);
ce426f
-void __sub(const mp_no *, const mp_no *, mp_no *, int);
ce426f
-void __mul(const mp_no *, const mp_no *, mp_no *, int);
ce426f
-// void __inv(const mp_no *, mp_no *, int);
ce426f
-void __dvd(const mp_no *, const mp_no *, mp_no *, int);
ce426f
+#ifndef RADIXI
ce426f
+# define  RADIXI    0x1.0p-24		/* 2^-24   */
ce426f
+#endif
ce426f
+
ce426f
+#ifndef TWO52
ce426f
+# define  TWO52     0x1.0p52		/* 2^52    */
ce426f
+#endif
ce426f
+
ce426f
+#define  TWO5      TWOPOW (5)		/* 2^5     */
ce426f
+#define  TWO8      TWOPOW (8)		/* 2^52    */
ce426f
+#define  TWO10     TWOPOW (10)		/* 2^10    */
ce426f
+#define  TWO18     TWOPOW (18)		/* 2^18    */
ce426f
+#define  TWO19     TWOPOW (19)		/* 2^19    */
ce426f
+#define  TWO23     TWOPOW (23)		/* 2^23    */
ce426f
+
ce426f
+#define  TWO57     0x1.0p57            /* 2^57    */
ce426f
+#define  TWO71     0x1.0p71            /* 2^71    */
ce426f
+#define  TWOM1032  0x1.0p-1032         /* 2^-1032 */
ce426f
+#define  TWOM1022  0x1.0p-1022         /* 2^-1022 */
ce426f
+
ce426f
+#define  HALF      0x1.0p-1            /* 1/2 */
ce426f
+#define  MHALF     -0x1.0p-1           /* -1/2 */
ce426f
+#define  HALFRAD   0x1.0p23            /* 2^23 */
ce426f
+
ce426f
+int __acr (const mp_no *, const mp_no *, int);
ce426f
+void __cpy (const mp_no *, mp_no *, int);
ce426f
+void __mp_dbl (const mp_no *, double *, int);
ce426f
+void __dbl_mp (double, mp_no *, int);
ce426f
+void __add (const mp_no *, const mp_no *, mp_no *, int);
ce426f
+void __sub (const mp_no *, const mp_no *, mp_no *, int);
ce426f
+void __mul (const mp_no *, const mp_no *, mp_no *, int);
ce426f
+void __sqr (const mp_no *, mp_no *, int);
ce426f
+void __dvd (const mp_no *, const mp_no *, mp_no *, int);
ce426f
 
ce426f
 extern void __mpatan (mp_no *, mp_no *, int);
ce426f
 extern void __mpatan2 (mp_no *, mp_no *, mp_no *, int);
ce426f
 extern void __mpsqrt (mp_no *, mp_no *, int);
ce426f
-extern void __mpexp (mp_no *, mp_no *__y, int);
ce426f
+extern void __mpexp (mp_no *, mp_no *, int);
ce426f
 extern void __c32 (mp_no *, mp_no *, mp_no *, int);
ce426f
 extern int __mpranred (double, mp_no *, int);
ce426f
+
ce426f
+/* Given a power POW, build a multiprecision number 2^POW.  */
ce426f
+static inline void
ce426f
+__pow_mp (int pow, mp_no *y, int p)
ce426f
+{
ce426f
+  int i, rem;
ce426f
+
ce426f
+  /* The exponent is E such that E is a factor of 2^24.  The remainder (of the
ce426f
+     form 2^x) goes entirely into the first digit of the mantissa as it is
ce426f
+     always less than 2^24.  */
ce426f
+  EY = pow / 24;
ce426f
+  rem = pow - EY * 24;
ce426f
+  EY++;
ce426f
+
ce426f
+  /* If the remainder is negative, it means that POW was negative since
ce426f
+     |EY * 24| <= |pow|.  Adjust so that REM is positive and still less than
ce426f
+     24 because of which, the mantissa digit is less than 2^24.  */
ce426f
+  if (rem < 0)
ce426f
+    {
ce426f
+      EY--;
ce426f
+      rem += 24;
ce426f
+    }
ce426f
+  /* The sign of any 2^x is always positive.  */
ce426f
+  Y[0] = 1;
ce426f
+  Y[1] = 1 << rem;
ce426f
+
ce426f
+  /* Everything else is 0.  */
ce426f
+  for (i = 2; i <= p; i++)
ce426f
+    Y[i] = 0;
ce426f
+}
ce426f
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpexp.c
ce426f
===================================================================
ce426f
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpexp.c
ce426f
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpexp.c
ce426f
@@ -69,13 +69,13 @@ __mpexp(mp_no *x, mp_no *y, int p) {
ce426f
   for (i=0; i
ce426f
   for (   ; i>EX; i--)  a *= RADIX;
ce426f
   b = X[1]*RADIXI;   m2 = 24*EX;
ce426f
-  for (; b
ce426f
+  for (; b
ce426f
   if (b == HALF) {
ce426f
-    for (i=2; i<=p; i++) { if (X[i]!=ZERO)  break; }
ce426f
-    if (i==p+1)  { m2--;  a *= TWO; }
ce426f
+    for (i=2; i<=p; i++) { if (X[i]!=0)  break; }
ce426f
+    if (i==p+1)  { m2--;  a *= 2; }
ce426f
   }
ce426f
   if ((m=m1+m2) <= 0) {
ce426f
-    m=0;  a=ONE;
ce426f
+    m=0;  a=1;
ce426f
     for (i=n-1; i>0; i--,n--) { if (m1np[i][p]+m2>0)  break; }
ce426f
   }
ce426f
 
ce426f
@@ -84,8 +84,8 @@ __mpexp(mp_no *x, mp_no *y, int p) {
ce426f
   __mul(x,&mpt1,&mps,p);
ce426f
 
ce426f
   /* Evaluate the polynomial. Put result in mpt2 */
ce426f
-  mpone.e=1;   mpone.d[0]=ONE;   mpone.d[1]=ONE;
ce426f
-  mpk.e = 1;   mpk.d[0] = ONE;   mpk.d[1]=__mpexp_nn[n].d;
ce426f
+  mpone.e=1;   mpone.d[0]=1;   mpone.d[1]=1;
ce426f
+  mpk.e = 1;   mpk.d[0] = 1;   mpk.d[1]=__mpexp_nn[n].d;
ce426f
   __dvd(&mps,&mpk,&mpt1,p);
ce426f
   __add(&mpone,&mpt1,&mpak,p);
ce426f
   for (k=n-1; k>1; k--) {
ce426f
@@ -99,9 +99,9 @@ __mpexp(mp_no *x, mp_no *y, int p) {
ce426f
 
ce426f
   /* Raise polynomial value to the power of 2**m. Put result in y */
ce426f
   for (k=0,j=0; k
ce426f
-    __mul(&mpt2,&mpt2,&mpt1,p);  k++;
ce426f
+    __sqr (&mpt2, &mpt1, p);  k++;
ce426f
     if (k==m)  { j=1;  break; }
ce426f
-    __mul(&mpt1,&mpt1,&mpt2,p);  k++;
ce426f
+    __sqr (&mpt1, &mpt2, p);  k++;
ce426f
   }
ce426f
   if (j)  __cpy(&mpt1,y,p);
ce426f
   else    __cpy(&mpt2,y,p);
ce426f
Index: glibc-2.17-c758a686/sysdeps/x86_64/fpu/multiarch/mpa-avx.c
ce426f
===================================================================
ce426f
--- glibc-2.17-c758a686.orig/sysdeps/x86_64/fpu/multiarch/mpa-avx.c
ce426f
+++ glibc-2.17-c758a686/sysdeps/x86_64/fpu/multiarch/mpa-avx.c
ce426f
@@ -1,5 +1,6 @@
ce426f
 #define __add __add_avx
ce426f
 #define __mul __mul_avx
ce426f
+#define __sqr __sqr_avx
ce426f
 #define __sub __sub_avx
ce426f
 #define __dbl_mp __dbl_mp_avx
ce426f
 #define __dvd __dvd_avx
ce426f
Index: glibc-2.17-c758a686/sysdeps/x86_64/fpu/multiarch/mpa-fma4.c
ce426f
===================================================================
ce426f
--- glibc-2.17-c758a686.orig/sysdeps/x86_64/fpu/multiarch/mpa-fma4.c
ce426f
+++ glibc-2.17-c758a686/sysdeps/x86_64/fpu/multiarch/mpa-fma4.c
ce426f
@@ -1,5 +1,6 @@
ce426f
 #define __add __add_fma4
ce426f
 #define __mul __mul_fma4
ce426f
+#define __sqr __sqr_fma4
ce426f
 #define __sub __sub_fma4
ce426f
 #define __dbl_mp __dbl_mp_fma4
ce426f
 #define __dvd __dvd_fma4
ce426f
Index: glibc-2.17-c758a686/sysdeps/powerpc/power4/fpu/mpa.c
ce426f
===================================================================
ce426f
--- glibc-2.17-c758a686.orig/sysdeps/powerpc/power4/fpu/mpa.c
ce426f
+++ glibc-2.17-c758a686/sysdeps/powerpc/power4/fpu/mpa.c
ce426f
@@ -2,7 +2,7 @@
ce426f
 /*
ce426f
  * IBM Accurate Mathematical Library
ce426f
  * written by International Business Machines Corp.
ce426f
- * Copyright (C) 2001, 2006 Free Software Foundation
ce426f
+ * Copyright (C) 2001-2017 Free Software Foundation, Inc.
ce426f
  *
ce426f
  * This program is free software; you can redistribute it and/or modify
ce426f
  * it under the terms of the GNU Lesser General Public License as published by
ce426f
@@ -17,532 +17,198 @@
ce426f
  * You should have received a copy of the GNU Lesser General Public License
ce426f
  * along with this program; if not, see <http://www.gnu.org/licenses/>.
ce426f
  */
ce426f
-/************************************************************************/
ce426f
-/*  MODULE_NAME: mpa.c                                                  */
ce426f
-/*                                                                      */
ce426f
-/*  FUNCTIONS:                                                          */
ce426f
-/*               mcr                                                    */
ce426f
-/*               acr                                                    */
ce426f
-/*               cr                                                     */
ce426f
-/*               cpy                                                    */
ce426f
-/*               cpymn                                                  */
ce426f
-/*               norm                                                   */
ce426f
-/*               denorm                                                 */
ce426f
-/*               mp_dbl                                                 */
ce426f
-/*               dbl_mp                                                 */
ce426f
-/*               add_magnitudes                                         */
ce426f
-/*               sub_magnitudes                                         */
ce426f
-/*               add                                                    */
ce426f
-/*               sub                                                    */
ce426f
-/*               mul                                                    */
ce426f
-/*               inv                                                    */
ce426f
-/*               dvd                                                    */
ce426f
-/*                                                                      */
ce426f
-/* Arithmetic functions for multiple precision numbers.                 */
ce426f
-/* Relative errors are bounded                                          */
ce426f
-/************************************************************************/
ce426f
-
ce426f
-
ce426f
-#include "endian.h"
ce426f
-#include "mpa.h"
ce426f
-#include "mpa2.h"
ce426f
-#include <sys/param.h>	/* For MIN() */
ce426f
-/* mcr() compares the sizes of the mantissas of two multiple precision  */
ce426f
-/* numbers. Mantissas are compared regardless of the signs of the       */
ce426f
-/* numbers, even if x->d[0] or y->d[0] are zero. Exponents are also     */
ce426f
-/* disregarded.                                                         */
ce426f
-static int mcr(const mp_no *x, const mp_no *y, int p) {
ce426f
-  long i;
ce426f
-  long p2 = p;
ce426f
-  for (i=1; i<=p2; i++) {
ce426f
-    if      (X[i] == Y[i])  continue;
ce426f
-    else if (X[i] >  Y[i])  return  1;
ce426f
-    else                    return -1; }
ce426f
-  return 0;
ce426f
-}
ce426f
-
ce426f
-
ce426f
-
ce426f
-/* acr() compares the absolute values of two multiple precision numbers */
ce426f
-int __acr(const mp_no *x, const mp_no *y, int p) {
ce426f
-  long i;
ce426f
-
ce426f
-  if      (X[0] == ZERO) {
ce426f
-    if    (Y[0] == ZERO) i= 0;
ce426f
-    else                 i=-1;
ce426f
-  }
ce426f
-  else if (Y[0] == ZERO) i= 1;
ce426f
-  else {
ce426f
-    if      (EX >  EY)   i= 1;
ce426f
-    else if (EX <  EY)   i=-1;
ce426f
-    else                 i= mcr(x,y,p);
ce426f
-  }
ce426f
-
ce426f
-  return i;
ce426f
-}
ce426f
-
ce426f
-
ce426f
-/* cr90 compares the values of two multiple precision numbers           */
ce426f
-int  __cr(const mp_no *x, const mp_no *y, int p) {
ce426f
-  int i;
ce426f
-
ce426f
-  if      (X[0] > Y[0])  i= 1;
ce426f
-  else if (X[0] < Y[0])  i=-1;
ce426f
-  else if (X[0] < ZERO ) i= __acr(y,x,p);
ce426f
-  else                   i= __acr(x,y,p);
ce426f
-
ce426f
-  return i;
ce426f
-}
ce426f
-
ce426f
-
ce426f
-/* Copy a multiple precision number. Set *y=*x. x=y is permissible.      */
ce426f
-void __cpy(const mp_no *x, mp_no *y, int p) {
ce426f
-  long i;
ce426f
-
ce426f
-  EY = EX;
ce426f
-  for (i=0; i <= p; i++)    Y[i] = X[i];
ce426f
-
ce426f
-  return;
ce426f
-}
ce426f
-
ce426f
-
ce426f
-/* Copy a multiple precision number x of precision m into a */
ce426f
-/* multiple precision number y of precision n. In case n>m, */
ce426f
-/* the digits of y beyond the m'th are set to zero. In case */
ce426f
-/* n
ce426f
-/* x=y is permissible.                                      */
ce426f
-
ce426f
-void __cpymn(const mp_no *x, int m, mp_no *y, int n) {
ce426f
-
ce426f
-  long i,k;
ce426f
-  long n2 = n;
ce426f
-  long m2 = m;
ce426f
-
ce426f
-  EY = EX;     k=MIN(m2,n2);
ce426f
-  for (i=0; i <= k; i++)    Y[i] = X[i];
ce426f
-  for (   ; i <= n2; i++)    Y[i] = ZERO;
ce426f
-
ce426f
-  return;
ce426f
-}
ce426f
-
ce426f
-/* Convert a multiple precision number *x into a double precision */
ce426f
-/* number *y, normalized case  (|x| >= 2**(-1022))) */
ce426f
-static void norm(const mp_no *x, double *y, int p)
ce426f
-{
ce426f
-  #define R  radixi.d
ce426f
-  long i;
ce426f
-#if 0
ce426f
-  int k;
ce426f
-#endif
ce426f
-  double a,c,u,v,z[5];
ce426f
-  if (p<5) {
ce426f
-    if      (p==1) c = X[1];
ce426f
-    else if (p==2) c = X[1] + R* X[2];
ce426f
-    else if (p==3) c = X[1] + R*(X[2]  +   R* X[3]);
ce426f
-    else if (p==4) c =(X[1] + R* X[2]) + R*R*(X[3] + R*X[4]);
ce426f
-  }
ce426f
-  else {
ce426f
-    for (a=ONE, z[1]=X[1]; z[1] < TWO23; )
ce426f
-        {a *= TWO;   z[1] *= TWO; }
ce426f
-
ce426f
-    for (i=2; i<5; i++) {
ce426f
-      z[i] = X[i]*a;
ce426f
-      u = (z[i] + CUTTER)-CUTTER;
ce426f
-      if  (u > z[i])  u -= RADIX;
ce426f
-      z[i] -= u;
ce426f
-      z[i-1] += u*RADIXI;
ce426f
-    }
ce426f
-
ce426f
-    u = (z[3] + TWO71) - TWO71;
ce426f
-    if (u > z[3])   u -= TWO19;
ce426f
-    v = z[3]-u;
ce426f
-
ce426f
-    if (v == TWO18) {
ce426f
-      if (z[4] == ZERO) {
ce426f
-        for (i=5; i <= p; i++) {
ce426f
-          if (X[i] == ZERO)   continue;
ce426f
-          else                {z[3] += ONE;   break; }
ce426f
-        }
ce426f
-      }
ce426f
-      else              z[3] += ONE;
ce426f
-    }
ce426f
-
ce426f
-    c = (z[1] + R *(z[2] + R * z[3]))/a;
ce426f
-  }
ce426f
 
ce426f
-  c *= X[0];
ce426f
-
ce426f
-  for (i=1; i
ce426f
-  for (i=1; i>EX; i--)   c *= RADIXI;
ce426f
-
ce426f
-  *y = c;
ce426f
-  return;
ce426f
-#undef R
ce426f
-}
ce426f
-
ce426f
-/* Convert a multiple precision number *x into a double precision */
ce426f
-/* number *y, denormalized case  (|x| < 2**(-1022))) */
ce426f
-static void denorm(const mp_no *x, double *y, int p)
ce426f
+/* Define __mul and __sqr and use the rest from generic code.  */
ce426f
+#define NO__MUL
ce426f
+#define NO__SQR
ce426f
+
ce426f
+#include <sysdeps/ieee754/dbl-64/mpa.c>
ce426f
+
ce426f
+/* Multiply *X and *Y and store result in *Z.  X and Y may overlap but not X
ce426f
+   and Z or Y and Z.  For P in [1, 2, 3], the exact result is truncated to P
ce426f
+   digits.  In case P > 3 the error is bounded by 1.001 ULP.  */
ce426f
+void
ce426f
+__mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
ce426f
 {
ce426f
-  long i,k;
ce426f
+  long i, i1, i2, j, k, k2;
ce426f
   long p2 = p;
ce426f
-  double c,u,z[5];
ce426f
-#if 0
ce426f
-  double a,v;
ce426f
-#endif
ce426f
+  double u, zk, zk2;
ce426f
 
ce426f
-#define R  radixi.d
ce426f
-  if (EX<-44 || (EX==-44 && X[1]
ce426f
-     { *y=ZERO; return; }
ce426f
-
ce426f
-  if      (p2==1) {
ce426f
-    if      (EX==-42) {z[1]=X[1]+TWO10;  z[2]=ZERO;  z[3]=ZERO;  k=3;}
ce426f
-    else if (EX==-43) {z[1]=     TWO10;  z[2]=X[1];  z[3]=ZERO;  k=2;}
ce426f
-    else              {z[1]=     TWO10;  z[2]=ZERO;  z[3]=X[1];  k=1;}
ce426f
-  }
ce426f
-  else if (p2==2) {
ce426f
-    if      (EX==-42) {z[1]=X[1]+TWO10;  z[2]=X[2];  z[3]=ZERO;  k=3;}
ce426f
-    else if (EX==-43) {z[1]=     TWO10;  z[2]=X[1];  z[3]=X[2];  k=2;}
ce426f
-    else              {z[1]=     TWO10;  z[2]=ZERO;  z[3]=X[1];  k=1;}
ce426f
-  }
ce426f
-  else {
ce426f
-    if      (EX==-42) {z[1]=X[1]+TWO10;  z[2]=X[2];  k=3;}
ce426f
-    else if (EX==-43) {z[1]=     TWO10;  z[2]=X[1];  k=2;}
ce426f
-    else              {z[1]=     TWO10;  z[2]=ZERO;  k=1;}
ce426f
-    z[3] = X[k];
ce426f
-  }
ce426f
-
ce426f
-  u = (z[3] + TWO57) - TWO57;
ce426f
-  if  (u > z[3])   u -= TWO5;
ce426f
-
ce426f
-  if (u==z[3]) {
ce426f
-    for (i=k+1; i <= p2; i++) {
ce426f
-      if (X[i] == ZERO)   continue;
ce426f
-      else {z[3] += ONE;   break; }
ce426f
+  /* Is z=0?  */
ce426f
+  if (__glibc_unlikely (X[0] * Y[0] == 0))
ce426f
+    {
ce426f
+      Z[0] = 0;
ce426f
+      return;
ce426f
     }
ce426f
-  }
ce426f
-
ce426f
-  c = X[0]*((z[1] + R*(z[2] + R*z[3])) - TWO10);
ce426f
 
ce426f
-  *y = c*TWOM1032;
ce426f
-  return;
ce426f
-
ce426f
-#undef R
ce426f
-}
ce426f
-
ce426f
-/* Convert a multiple precision number *x into a double precision number *y. */
ce426f
-/* The result is correctly rounded to the nearest/even. *x is left unchanged */
ce426f
-
ce426f
-void __mp_dbl(const mp_no *x, double *y, int p) {
ce426f
-#if 0
ce426f
-  int i,k;
ce426f
-  double a,c,u,v,z[5];
ce426f
+  /* Multiply, add and carry */
ce426f
+  k2 = (p2 < 3) ? p2 + p2 : p2 + 3;
ce426f
+  zk = Z[k2] = 0;
ce426f
+  for (k = k2; k > 1;)
ce426f
+    {
ce426f
+      if (k > p2)
ce426f
+	{
ce426f
+	  i1 = k - p2;
ce426f
+	  i2 = p2 + 1;
ce426f
+	}
ce426f
+      else
ce426f
+	{
ce426f
+	  i1 = 1;
ce426f
+	  i2 = k;
ce426f
+	}
ce426f
+#if 1
ce426f
+      /* Rearrange this inner loop to allow the fmadd instructions to be
ce426f
+         independent and execute in parallel on processors that have
ce426f
+         dual symmetrical FP pipelines.  */
ce426f
+      if (i1 < (i2 - 1))
ce426f
+	{
ce426f
+	  /* Make sure we have at least 2 iterations.  */
ce426f
+	  if (((i2 - i1) & 1L) == 1L)
ce426f
+	    {
ce426f
+	      /* Handle the odd iterations case.  */
ce426f
+	      zk2 = x->d[i2 - 1] * y->d[i1];
ce426f
+	    }
ce426f
+	  else
ce426f
+	    zk2 = 0.0;
ce426f
+	  /* Do two multiply/adds per loop iteration, using independent
ce426f
+	     accumulators; zk and zk2.  */
ce426f
+	  for (i = i1, j = i2 - 1; i < i2 - 1; i += 2, j -= 2)
ce426f
+	    {
ce426f
+	      zk += x->d[i] * y->d[j];
ce426f
+	      zk2 += x->d[i + 1] * y->d[j - 1];
ce426f
+	    }
ce426f
+	  zk += zk2;		/* Final sum.  */
ce426f
+	}
ce426f
+      else
ce426f
+	{
ce426f
+	  /* Special case when iterations is 1.  */
ce426f
+	  zk += x->d[i1] * y->d[i1];
ce426f
+	}
ce426f
+#else
ce426f
+      /* The original code.  */
ce426f
+      for (i = i1, j = i2 - 1; i < i2; i++, j--)
ce426f
+	zk += X[i] * Y[j];
ce426f
 #endif
ce426f
 
ce426f
-  if (X[0] == ZERO)  {*y = ZERO;  return; }
ce426f
-
ce426f
-  if      (EX> -42)                 norm(x,y,p);
ce426f
-  else if (EX==-42 && X[1]>=TWO10)  norm(x,y,p);
ce426f
-  else                              denorm(x,y,p);
ce426f
-}
ce426f
-
ce426f
-
ce426f
-/* dbl_mp() converts a double precision number x into a multiple precision  */
ce426f
-/* number *y. If the precision p is too small the result is truncated. x is */
ce426f
-/* left unchanged.                                                          */
ce426f
-
ce426f
-void __dbl_mp(double x, mp_no *y, int p) {
ce426f
+      u = (zk + CUTTER) - CUTTER;
ce426f
+      if (u > zk)
ce426f
+	u -= RADIX;
ce426f
+      Z[k] = zk - u;
ce426f
+      zk = u * RADIXI;
ce426f
+      --k;
ce426f
+    }
ce426f
+  Z[k] = zk;
ce426f
 
ce426f
-  long i,n;
ce426f
-  long p2 = p;
ce426f
-  double u;
ce426f
+  int e = EX + EY;
ce426f
+  /* Is there a carry beyond the most significant digit?  */
ce426f
+  if (Z[1] == 0)
ce426f
+    {
ce426f
+      for (i = 1; i <= p2; i++)
ce426f
+	Z[i] = Z[i + 1];
ce426f
+      e--;
ce426f
+    }
ce426f
 
ce426f
-  /* Sign */
ce426f
-  if      (x == ZERO)  {Y[0] = ZERO;  return; }
ce426f
-  else if (x >  ZERO)   Y[0] = ONE;
ce426f
-  else                 {Y[0] = MONE;  x=-x;   }
ce426f
-
ce426f
-  /* Exponent */
ce426f
-  for (EY=ONE; x >= RADIX; EY += ONE)   x *= RADIXI;
ce426f
-  for (      ; x <  ONE;   EY -= ONE)   x *= RADIX;
ce426f
-
ce426f
-  /* Digits */
ce426f
-  n=MIN(p2,4);
ce426f
-  for (i=1; i<=n; i++) {
ce426f
-    u = (x + TWO52) - TWO52;
ce426f
-    if (u>x)   u -= ONE;
ce426f
-    Y[i] = u;     x -= u;    x *= RADIX; }
ce426f
-  for (   ; i<=p2; i++)     Y[i] = ZERO;
ce426f
-  return;
ce426f
+  EZ = e;
ce426f
+  Z[0] = X[0] * Y[0];
ce426f
 }
ce426f
 
ce426f
+/* Square *X and store result in *Y.  X and Y may not overlap.  For P in
ce426f
+   [1, 2, 3], the exact result is truncated to P digits.  In case P > 3 the
ce426f
+   error is bounded by 1.001 ULP.  This is a faster special case of
ce426f
+   multiplication.  */
ce426f
+void
ce426f
+__sqr (const mp_no *x, mp_no *y, int p)
ce426f
+{
ce426f
+  long i, j, k, ip;
ce426f
+  double u, yk;
ce426f
 
ce426f
-/*  add_magnitudes() adds the magnitudes of *x & *y assuming that           */
ce426f
-/*  abs(*x) >= abs(*y) > 0.                                                 */
ce426f
-/* The sign of the sum *z is undefined. x&y may overlap but not x&z or y&z. */
ce426f
-/* No guard digit is used. The result equals the exact sum, truncated.      */
ce426f
-/* *x & *y are left unchanged.                                              */
ce426f
+  /* Is z=0?  */
ce426f
+  if (__glibc_unlikely (X[0] == 0))
ce426f
+    {
ce426f
+      Y[0] = 0;
ce426f
+      return;
ce426f
+    }
ce426f
 
ce426f
-static void add_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) {
ce426f
+  /* We need not iterate through all X's since it's pointless to
ce426f
+     multiply zeroes.  */
ce426f
+  for (ip = p; ip > 0; ip--)
ce426f
+    if (X[ip] != 0)
ce426f
+      break;
ce426f
 
ce426f
-  long i,j,k;
ce426f
-  long p2 = p;
ce426f
+  k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
ce426f
 
ce426f
-  EZ = EX;
ce426f
+  while (k > 2 * ip + 1)
ce426f
+    Y[k--] = 0;
ce426f
 
ce426f
-  i=p2;    j=p2+ EY - EX;    k=p2+1;
ce426f
+  yk = 0;
ce426f
 
ce426f
-  if (j<1)
ce426f
-     {__cpy(x,z,p);  return; }
ce426f
-  else   Z[k] = ZERO;
ce426f
-
ce426f
-  for (; j>0; i--,j--) {
ce426f
-    Z[k] += X[i] + Y[j];
ce426f
-    if (Z[k] >= RADIX) {
ce426f
-      Z[k]  -= RADIX;
ce426f
-      Z[--k] = ONE; }
ce426f
-    else
ce426f
-      Z[--k] = ZERO;
ce426f
-  }
ce426f
-
ce426f
-  for (; i>0; i--) {
ce426f
-    Z[k] += X[i];
ce426f
-    if (Z[k] >= RADIX) {
ce426f
-      Z[k]  -= RADIX;
ce426f
-      Z[--k] = ONE; }
ce426f
-    else
ce426f
-      Z[--k] = ZERO;
ce426f
-  }
ce426f
-
ce426f
-  if (Z[1] == ZERO) {
ce426f
-    for (i=1; i<=p2; i++)    Z[i] = Z[i+1]; }
ce426f
-  else   EZ += ONE;
ce426f
-}
ce426f
+  while (k > p)
ce426f
+    {
ce426f
+      double yk2 = 0.0;
ce426f
+      long lim = k / 2;
ce426f
 
ce426f
+      if (k % 2 == 0)
ce426f
+        {
ce426f
+	  yk += X[lim] * X[lim];
ce426f
+	  lim--;
ce426f
+	}
ce426f
 
ce426f
-/*  sub_magnitudes() subtracts the magnitudes of *x & *y assuming that      */
ce426f
-/*  abs(*x) > abs(*y) > 0.                                                  */
ce426f
-/* The sign of the difference *z is undefined. x&y may overlap but not x&z  */
ce426f
-/* or y&z. One guard digit is used. The error is less than one ulp.         */
ce426f
-/* *x & *y are left unchanged.                                              */
ce426f
+      /* In __mul, this loop (and the one within the next while loop) run
ce426f
+         between a range to calculate the mantissa as follows:
ce426f
 
ce426f
-static void sub_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) {
ce426f
+         Z[k] = X[k] * Y[n] + X[k+1] * Y[n-1] ... + X[n-1] * Y[k+1]
ce426f
+		+ X[n] * Y[k]
ce426f
 
ce426f
-  long i,j,k;
ce426f
-  long p2 = p;
ce426f
+         For X == Y, we can get away with summing halfway and doubling the
ce426f
+	 result.  For cases where the range size is even, the mid-point needs
ce426f
+	 to be added separately (above).  */
ce426f
+      for (i = k - p, j = p; i <= lim; i++, j--)
ce426f
+	yk2 += X[i] * X[j];
ce426f
 
ce426f
-  EZ = EX;
ce426f
+      yk += 2.0 * yk2;
ce426f
 
ce426f
-  if (EX == EY) {
ce426f
-    i=j=k=p2;
ce426f
-    Z[k] = Z[k+1] = ZERO; }
ce426f
-  else {
ce426f
-    j= EX - EY;
ce426f
-    if (j > p2)  {__cpy(x,z,p);  return; }
ce426f
-    else {
ce426f
-      i=p2;   j=p2+1-j;   k=p2;
ce426f
-      if (Y[j] > ZERO) {
ce426f
-        Z[k+1] = RADIX - Y[j--];
ce426f
-        Z[k]   = MONE; }
ce426f
-      else {
ce426f
-        Z[k+1] = ZERO;
ce426f
-        Z[k]   = ZERO;   j--;}
ce426f
+      u = (yk + CUTTER) - CUTTER;
ce426f
+      if (u > yk)
ce426f
+	u -= RADIX;
ce426f
+      Y[k--] = yk - u;
ce426f
+      yk = u * RADIXI;
ce426f
     }
ce426f
-  }
ce426f
-
ce426f
-  for (; j>0; i--,j--) {
ce426f
-    Z[k] += (X[i] - Y[j]);
ce426f
-    if (Z[k] < ZERO) {
ce426f
-      Z[k]  += RADIX;
ce426f
-      Z[--k] = MONE; }
ce426f
-    else
ce426f
-      Z[--k] = ZERO;
ce426f
-  }
ce426f
-
ce426f
-  for (; i>0; i--) {
ce426f
-    Z[k] += X[i];
ce426f
-    if (Z[k] < ZERO) {
ce426f
-      Z[k]  += RADIX;
ce426f
-      Z[--k] = MONE; }
ce426f
-    else
ce426f
-      Z[--k] = ZERO;
ce426f
-  }
ce426f
-
ce426f
-  for (i=1; Z[i] == ZERO; i++) ;
ce426f
-  EZ = EZ - i + 1;
ce426f
-  for (k=1; i <= p2+1; )
ce426f
-    Z[k++] = Z[i++];
ce426f
-  for (; k <= p2; )
ce426f
-    Z[k++] = ZERO;
ce426f
-
ce426f
-  return;
ce426f
-}
ce426f
-
ce426f
-
ce426f
-/* Add two multiple precision numbers. Set *z = *x + *y. x&y may overlap  */
ce426f
-/* but not x&z or y&z. One guard digit is used. The error is less than    */
ce426f
-/* one ulp. *x & *y are left unchanged.                                   */
ce426f
-
ce426f
-void __add(const mp_no *x, const mp_no *y, mp_no *z, int p) {
ce426f
-
ce426f
-  int n;
ce426f
-
ce426f
-  if      (X[0] == ZERO)     {__cpy(y,z,p);  return; }
ce426f
-  else if (Y[0] == ZERO)     {__cpy(x,z,p);  return; }
ce426f
-
ce426f
-  if (X[0] == Y[0])   {
ce426f
-    if (__acr(x,y,p) > 0)      {add_magnitudes(x,y,z,p);  Z[0] = X[0]; }
ce426f
-    else                     {add_magnitudes(y,x,z,p);  Z[0] = Y[0]; }
ce426f
-  }
ce426f
-  else                       {
ce426f
-    if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p);  Z[0] = X[0]; }
ce426f
-    else if (n == -1)        {sub_magnitudes(y,x,z,p);  Z[0] = Y[0]; }
ce426f
-    else                      Z[0] = ZERO;
ce426f
-  }
ce426f
-  return;
ce426f
-}
ce426f
-
ce426f
-
ce426f
-/* Subtract two multiple precision numbers. *z is set to *x - *y. x&y may */
ce426f
-/* overlap but not x&z or y&z. One guard digit is used. The error is      */
ce426f
-/* less than one ulp. *x & *y are left unchanged.                         */
ce426f
-
ce426f
-void __sub(const mp_no *x, const mp_no *y, mp_no *z, int p) {
ce426f
-
ce426f
-  int n;
ce426f
-
ce426f
-  if      (X[0] == ZERO)     {__cpy(y,z,p);  Z[0] = -Z[0];  return; }
ce426f
-  else if (Y[0] == ZERO)     {__cpy(x,z,p);                 return; }
ce426f
-
ce426f
-  if (X[0] != Y[0])    {
ce426f
-    if (__acr(x,y,p) > 0)      {add_magnitudes(x,y,z,p);  Z[0] =  X[0]; }
ce426f
-    else                     {add_magnitudes(y,x,z,p);  Z[0] = -Y[0]; }
ce426f
-  }
ce426f
-  else                       {
ce426f
-    if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p);  Z[0] =  X[0]; }
ce426f
-    else if (n == -1)        {sub_magnitudes(y,x,z,p);  Z[0] = -Y[0]; }
ce426f
-    else                      Z[0] = ZERO;
ce426f
-  }
ce426f
-  return;
ce426f
-}
ce426f
-
ce426f
-
ce426f
-/* Multiply two multiple precision numbers. *z is set to *x * *y. x&y      */
ce426f
-/* may overlap but not x&z or y&z. In case p=1,2,3 the exact result is     */
ce426f
-/* truncated to p digits. In case p>3 the error is bounded by 1.001 ulp.   */
ce426f
-/* *x & *y are left unchanged.                                             */
ce426f
 
ce426f
-void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) {
ce426f
-
ce426f
-  long i, i1, i2, j, k, k2;
ce426f
-  long p2 = p;
ce426f
-  double u, zk, zk2;
ce426f
-
ce426f
-                      /* Is z=0? */
ce426f
-  if (X[0]*Y[0]==ZERO)
ce426f
-     { Z[0]=ZERO;  return; }
ce426f
-
ce426f
-                       /* Multiply, add and carry */
ce426f
-  k2 = (p2<3) ? p2+p2 : p2+3;
ce426f
-  zk = Z[k2]=ZERO;
ce426f
-  for (k=k2; k>1; ) {
ce426f
-    if (k > p2)  {i1=k-p2; i2=p2+1; }
ce426f
-    else        {i1=1;   i2=k;   }
ce426f
-#if 1
ce426f
-    /* rearange this inner loop to allow the fmadd instructions to be
ce426f
-       independent and execute in parallel on processors that have
ce426f
-       dual symetrical FP pipelines.  */
ce426f
-    if (i1 < (i2-1))
ce426f
+  while (k > 1)
ce426f
     {
ce426f
-	/* make sure we have at least 2 iterations */
ce426f
-	if (((i2 - i1) & 1L) == 1L)
ce426f
-	{
ce426f
-                /* Handle the odd iterations case.  */
ce426f
-		zk2 = x->d[i2-1]*y->d[i1];
ce426f
-	}
ce426f
-	else
ce426f
-		zk2 = zero.d;
ce426f
-	/* Do two multiply/adds per loop iteration, using independent
ce426f
-           accumulators; zk and zk2.  */
ce426f
-	for (i=i1,j=i2-1; i
ce426f
-	{
ce426f
-		zk += x->d[i]*y->d[j];
ce426f
-		zk2 += x->d[i+1]*y->d[j-1];
ce426f
+      double yk2 = 0.0;
ce426f
+      long lim = k / 2;
ce426f
+
ce426f
+      if (k % 2 == 0)
ce426f
+        {
ce426f
+	  yk += X[lim] * X[lim];
ce426f
+	  lim--;
ce426f
 	}
ce426f
-	zk += zk2; /* final sum.  */
ce426f
-    }
ce426f
-    else
ce426f
+
ce426f
+      /* Likewise for this loop.  */
ce426f
+      for (i = 1, j = k - 1; i <= lim; i++, j--)
ce426f
+	yk2 += X[i] * X[j];
ce426f
+
ce426f
+      yk += 2.0 * yk2;
ce426f
+
ce426f
+      u = (yk + CUTTER) - CUTTER;
ce426f
+      if (u > yk)
ce426f
+	u -= RADIX;
ce426f
+      Y[k--] = yk - u;
ce426f
+      yk = u * RADIXI;
ce426f
+    }
ce426f
+  Y[k] = yk;
ce426f
+
ce426f
+  /* Squares are always positive.  */
ce426f
+  Y[0] = 1.0;
ce426f
+
ce426f
+  int e = EX * 2;
ce426f
+  /* Is there a carry beyond the most significant digit?  */
ce426f
+  if (__glibc_unlikely (Y[1] == 0))
ce426f
     {
ce426f
-        /* Special case when iterations is 1.  */
ce426f
-	zk += x->d[i1]*y->d[i1];
ce426f
+      for (i = 1; i <= p; i++)
ce426f
+	Y[i] = Y[i + 1];
ce426f
+      e--;
ce426f
     }
ce426f
-#else
ce426f
-    /* The orginal code.  */
ce426f
-    for (i=i1,j=i2-1; i
ce426f
-#endif
ce426f
-
ce426f
-    u = (zk + CUTTER)-CUTTER;
ce426f
-    if  (u > zk)  u -= RADIX;
ce426f
-    Z[k]  = zk - u;
ce426f
-    zk = u*RADIXI;
ce426f
-    --k;
ce426f
-  }
ce426f
-  Z[k] = zk;
ce426f
-
ce426f
-                 /* Is there a carry beyond the most significant digit? */
ce426f
-  if (Z[1] == ZERO) {
ce426f
-    for (i=1; i<=p2; i++)  Z[i]=Z[i+1];
ce426f
-    EZ = EX + EY - 1; }
ce426f
-  else
ce426f
-    EZ = EX + EY;
ce426f
-
ce426f
-  Z[0] = X[0] * Y[0];
ce426f
-  return;
ce426f
-}
ce426f
-
ce426f
-
ce426f
-/* Invert a multiple precision number. Set *y = 1 / *x.                     */
ce426f
-/* Relative error bound = 1.001*r**(1-p) for p=2, 1.063*r**(1-p) for p=3,   */
ce426f
-/* 2.001*r**(1-p) for p>3.                                                  */
ce426f
-/* *x=0 is not permissible. *x is left unchanged.                           */
ce426f
-
ce426f
-void __inv(const mp_no *x, mp_no *y, int p) {
ce426f
-  long i;
ce426f
-#if 0
ce426f
-  int l;
ce426f
-#endif
ce426f
-  double t;
ce426f
-  mp_no z,w;
ce426f
-  static const int np1[] = {0,0,0,0,1,2,2,2,2,3,3,3,3,3,3,3,3,3,
ce426f
-                            4,4,4,4,4,4,4,4,4,4,4,4,4,4,4};
ce426f
-  const mp_no mptwo = {1,{1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
ce426f
-                         0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
ce426f
-                         0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
ce426f
-                         0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}};
ce426f
-
ce426f
-  __cpy(x,&z,p);  z.e=0;  __mp_dbl(&z,&t,p);
ce426f
-  t=ONE/t;   __dbl_mp(t,y,p);    EY -= EX;
ce426f
-
ce426f
-  for (i=0; i
ce426f
-    __cpy(y,&w,p);
ce426f
-    __mul(x,&w,y,p);
ce426f
-    __sub(&mptwo,y,&z,p);
ce426f
-    __mul(&w,&z,y,p);
ce426f
-  }
ce426f
-  return;
ce426f
-}
ce426f
-
ce426f
-
ce426f
-/* Divide one multiple precision number by another.Set *z = *x / *y. *x & *y */
ce426f
-/* are left unchanged. x&y may overlap but not x&z or y&z.                   */
ce426f
-/* Relative error bound = 2.001*r**(1-p) for p=2, 2.063*r**(1-p) for p=3     */
ce426f
-/* and 3.001*r**(1-p) for p>3. *y=0 is not permissible.                      */
ce426f
-
ce426f
-void __dvd(const mp_no *x, const mp_no *y, mp_no *z, int p) {
ce426f
-
ce426f
-  mp_no w;
ce426f
-
ce426f
-  if (X[0] == ZERO)    Z[0] = ZERO;
ce426f
-  else                {__inv(y,&w,p);   __mul(x,&w,z,p);}
ce426f
-  return;
ce426f
+  EY = e;
ce426f
 }
ce426f
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/atnat.h
ce426f
===================================================================
ce426f
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/atnat.h
ce426f
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/atnat.h
ce426f
@@ -138,8 +138,6 @@
ce426f
 #endif
ce426f
 #endif
ce426f
 
ce426f
-#define  ZERO      zero.d
ce426f
-#define  ONE       one.d
ce426f
 #define  A         a.d
ce426f
 #define  B         b.d
ce426f
 #define  C         c.d
ce426f
@@ -160,7 +158,5 @@
ce426f
 #define  U6        u6.d
ce426f
 #define  U7        u7.d
ce426f
 #define  U8        u8.d
ce426f
-#define  TWO8      two8.d
ce426f
-#define  TWO52     two52.d
ce426f
 
ce426f
 #endif
ce426f
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/atnat2.h
ce426f
===================================================================
ce426f
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/atnat2.h
ce426f
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/atnat2.h
ce426f
@@ -174,11 +174,4 @@
ce426f
 #endif
ce426f
 #endif
ce426f
 
ce426f
-#define  ZERO      zero.d
ce426f
-#define  MZERO     mzero.d
ce426f
-#define  ONE       one.d
ce426f
-#define  TWO8      two8.d
ce426f
-#define  TWO52     two52.d
ce426f
-#define  TWOM1022  twom1022.d
ce426f
-
ce426f
 #endif
ce426f
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpa2.h
ce426f
===================================================================
ce426f
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpa2.h
ce426f
+++ /dev/null
ce426f
@@ -1,94 +0,0 @@
ce426f
-
ce426f
-/*
ce426f
- * IBM Accurate Mathematical Library
ce426f
- * Written by International Business Machines Corp.
ce426f
- * Copyright (C) 2001 Free Software Foundation, Inc.
ce426f
- *
ce426f
- * This program is free software; you can redistribute it and/or modify
ce426f
- * it under the terms of the GNU Lesser General Public License as published by
ce426f
- * the Free Software Foundation; either version 2.1 of the License, or
ce426f
- * (at your option) any later version.
ce426f
- *
ce426f
- * This program is distributed in the hope that it will be useful,
ce426f
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
ce426f
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
ce426f
- * GNU Lesser General Public License for more details.
ce426f
- *
ce426f
- * You should have received a copy of the GNU Lesser General Public License
ce426f
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
ce426f
- */
ce426f
-
ce426f
-/**************************************************************************/
ce426f
-/*                                                                        */
ce426f
-/* MODULE_NAME:mpa2.h                                                     */
ce426f
-/*                                                                        */
ce426f
-/*                                                                        */
ce426f
-/*   variables prototype and definition   according to type of processor  */
ce426f
-/*   types definition                                                     */
ce426f
-/**************************************************************************/
ce426f
-
ce426f
-#ifndef MPA2_H
ce426f
-#define MPA2_H
ce426f
-
ce426f
-
ce426f
-#ifdef BIG_ENDI
ce426f
-static const number
ce426f
-/**/ radix          = {{0x41700000, 0x00000000} }, /* 2**24  */
ce426f
-/**/ radixi         = {{0x3e700000, 0x00000000} }, /* 2**-24 */
ce426f
-/**/ cutter         = {{0x44b00000, 0x00000000} }, /* 2**76  */
ce426f
-/**/ zero           = {{0x00000000, 0x00000000} }, /*  0     */
ce426f
-/**/ one            = {{0x3ff00000, 0x00000000} }, /*  1     */
ce426f
-/**/ mone           = {{0xbff00000, 0x00000000} }, /* -1     */
ce426f
-/**/ two            = {{0x40000000, 0x00000000} }, /*  2     */
ce426f
-/**/ two5           = {{0x40400000, 0x00000000} }, /* 2**5   */
ce426f
-/**/ two10          = {{0x40900000, 0x00000000} }, /* 2**10  */
ce426f
-/**/ two18          = {{0x41100000, 0x00000000} }, /* 2**18  */
ce426f
-/**/ two19          = {{0x41200000, 0x00000000} }, /* 2**19  */
ce426f
-/**/ two23          = {{0x41600000, 0x00000000} }, /* 2**23  */
ce426f
-/**/ two52          = {{0x43300000, 0x00000000} }, /* 2**52  */
ce426f
-/**/ two57          = {{0x43800000, 0x00000000} }, /* 2**57  */
ce426f
-/**/ two71          = {{0x44600000, 0x00000000} }, /* 2**71  */
ce426f
-/**/ twom1032       = {{0x00000400, 0x00000000} }; /* 2**-1032 */
ce426f
-
ce426f
-#else
ce426f
-#ifdef LITTLE_ENDI
ce426f
-static const number
ce426f
-/**/ radix          = {{0x00000000, 0x41700000} }, /* 2**24  */
ce426f
-/**/ radixi         = {{0x00000000, 0x3e700000} }, /* 2**-24 */
ce426f
-/**/ cutter         = {{0x00000000, 0x44b00000} }, /* 2**76  */
ce426f
-/**/ zero           = {{0x00000000, 0x00000000} }, /*  0     */
ce426f
-/**/ one            = {{0x00000000, 0x3ff00000} }, /*  1     */
ce426f
-/**/ mone           = {{0x00000000, 0xbff00000} }, /* -1     */
ce426f
-/**/ two            = {{0x00000000, 0x40000000} }, /*  2     */
ce426f
-/**/ two5           = {{0x00000000, 0x40400000} }, /* 2**5   */
ce426f
-/**/ two10          = {{0x00000000, 0x40900000} }, /* 2**10  */
ce426f
-/**/ two18          = {{0x00000000, 0x41100000} }, /* 2**18  */
ce426f
-/**/ two19          = {{0x00000000, 0x41200000} }, /* 2**19  */
ce426f
-/**/ two23          = {{0x00000000, 0x41600000} }, /* 2**23  */
ce426f
-/**/ two52          = {{0x00000000, 0x43300000} }, /* 2**52  */
ce426f
-/**/ two57          = {{0x00000000, 0x43800000} }, /* 2**57  */
ce426f
-/**/ two71          = {{0x00000000, 0x44600000} }, /* 2**71  */
ce426f
-/**/ twom1032       = {{0x00000000, 0x00000400} }; /* 2**-1032 */
ce426f
-
ce426f
-#endif
ce426f
-#endif
ce426f
-
ce426f
-#define  RADIX     radix.d
ce426f
-#define  RADIXI    radixi.d
ce426f
-#define  CUTTER    cutter.d
ce426f
-#define  ZERO      zero.d
ce426f
-#define  ONE       one.d
ce426f
-#define  MONE      mone.d
ce426f
-#define  TWO       two.d
ce426f
-#define  TWO5      two5.d
ce426f
-#define  TWO10     two10.d
ce426f
-#define  TWO18     two18.d
ce426f
-#define  TWO19     two19.d
ce426f
-#define  TWO23     two23.d
ce426f
-#define  TWO52     two52.d
ce426f
-#define  TWO57     two57.d
ce426f
-#define  TWO71     two71.d
ce426f
-#define  TWOM1032  twom1032.d
ce426f
-
ce426f
-
ce426f
-#endif
ce426f
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpatan.h
ce426f
===================================================================
ce426f
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpatan.h
ce426f
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpatan.h
ce426f
@@ -177,6 +177,3 @@ __atan_twonm1[33] = {
ce426f
 
ce426f
 #endif
ce426f
 #endif
ce426f
-
ce426f
-#define  ONE       __atan_one.d
ce426f
-#define  TWO       __atan_two.d
ce426f
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpatan2.c
ce426f
===================================================================
ce426f
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpatan2.c
ce426f
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpatan2.c
ce426f
@@ -49,18 +49,16 @@ void
ce426f
 SECTION
ce426f
 __mpatan2(mp_no *y, mp_no *x, mp_no *z, int p) {
ce426f
 
ce426f
-  static const double ZERO = 0.0, ONE = 1.0;
ce426f
-
ce426f
   mp_no mpone = {0,{0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
ce426f
 		    0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
ce426f
 		    0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}};
ce426f
   mp_no mpt1,mpt2,mpt3;
ce426f
 
ce426f
 
ce426f
-  if (X[0] <= ZERO) {
ce426f
-    mpone.e = 1;                 mpone.d[0] = mpone.d[1] = ONE;
ce426f
+  if (X[0] <= 0) {
ce426f
+    mpone.e = 1;                 mpone.d[0] = mpone.d[1] = 1;
ce426f
     __dvd(x,y,&mpt1,p);          __mul(&mpt1,&mpt1,&mpt2,p);
ce426f
-    if (mpt1.d[0] != ZERO)       mpt1.d[0] = ONE;
ce426f
+    if (mpt1.d[0] != 0)       mpt1.d[0] = 1;
ce426f
     __add(&mpt2,&mpone,&mpt3,p); __mpsqrt(&mpt3,&mpt2,p);
ce426f
     __add(&mpt1,&mpt2,&mpt3,p);  mpt3.d[0]=Y[0];
ce426f
     __mpatan(&mpt3,&mpt1,p);     __add(&mpt1,&mpt1,z,p);
ce426f
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpexp.h
ce426f
===================================================================
ce426f
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpexp.h
ce426f
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpexp.h
ce426f
@@ -159,11 +159,4 @@ extern const number __mpexp_half attribu
ce426f
 #endif
ce426f
 #endif
ce426f
 
ce426f
-#define  RADIX     __mpexp_radix.d
ce426f
-#define  RADIXI    __mpexp_radixi.d
ce426f
-#define  ZERO      __mpexp_zero.d
ce426f
-#define  ONE       __mpexp_one.d
ce426f
-#define  TWO       __mpexp_two.d
ce426f
-#define  HALF      __mpexp_half.d
ce426f
-
ce426f
 #endif
ce426f
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpsqrt.h
ce426f
===================================================================
ce426f
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpsqrt.h
ce426f
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpsqrt.h
ce426f
@@ -51,7 +51,4 @@ extern const int __mpsqrt_mp[33] attribu
ce426f
 			     4,4,4,4,4,4,4,4,4};
ce426f
 #endif
ce426f
 
ce426f
-#define  ONE       __mpsqrt_one.d
ce426f
-#define  HALFRAD   __mpsqrt_halfrad.d
ce426f
-
ce426f
 #endif
ce426f
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mptan.c
ce426f
===================================================================
ce426f
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mptan.c
ce426f
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mptan.c
ce426f
@@ -47,8 +47,6 @@ void
ce426f
 SECTION
ce426f
 __mptan(double x, mp_no *mpy, int p) {
ce426f
 
ce426f
-  static const double MONE = -1.0;
ce426f
-
ce426f
   int n;
ce426f
   mp_no mpw, mpc, mps;
ce426f
 
ce426f
@@ -56,7 +54,7 @@ __mptan(double x, mp_no *mpy, int p) {
ce426f
   __c32(&mpw, &mpc, &mps, p);              /* computing sin(x) and cos(x) */
ce426f
   if (n)                     /* second or fourth quarter of unit circle */
ce426f
   { __dvd(&mpc,&mps,mpy,p);
ce426f
-    mpy->d[0] *= MONE;
ce426f
+    mpy->d[0] *= -1;
ce426f
   }                          /* tan is negative in this area */
ce426f
   else  __dvd(&mps,&mpc,mpy,p);
ce426f
 
ce426f
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/ulog.h
ce426f
===================================================================
ce426f
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/ulog.h
ce426f
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/ulog.h
ce426f
@@ -181,10 +181,6 @@
ce426f
 #endif
ce426f
 #endif
ce426f
 
ce426f
-#define  ZERO      zero.d
ce426f
-#define  ONE       one.d
ce426f
-#define  HALF      half.d
ce426f
-#define  MHALF     mhalf.d
ce426f
 #define  SQRT_2    sqrt_2.d
ce426f
 #define  DEL_U     delu.d
ce426f
 #define  DEL_V     delv.d
ce426f
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/utan.h
ce426f
===================================================================
ce426f
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/utan.h
ce426f
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/utan.h
ce426f
@@ -270,10 +270,4 @@
ce426f
 #endif
ce426f
 #endif
ce426f
 
ce426f
-
ce426f
-#define  ZERO      zero.d
ce426f
-#define  ONE       one.d
ce426f
-#define  MONE      mone.d
ce426f
-#define  TWO8      two8.d
ce426f
-
ce426f
 #endif
ce426f
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpa-arch.h
ce426f
===================================================================
ce426f
--- /dev/null
ce426f
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpa-arch.h
ce426f
@@ -0,0 +1,47 @@
ce426f
+/* Overridable constants and operations.
ce426f
+   Copyright (C) 2013 Free Software Foundation, Inc.
ce426f
+
ce426f
+   This program is free software; you can redistribute it and/or modify
ce426f
+   it under the terms of the GNU Lesser General Public License as published by
ce426f
+   the Free Software Foundation; either version 2.1 of the License, or
ce426f
+   (at your option) any later version.
ce426f
+
ce426f
+   This program is distributed in the hope that it will be useful,
ce426f
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
ce426f
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
ce426f
+   GNU Lesser General Public License for more details.
ce426f
+
ce426f
+   You should have received a copy of the GNU Lesser General Public License
ce426f
+   along with this program; if not, see <http://www.gnu.org/licenses/>.  */
ce426f
+
ce426f
+#include <stdint.h>
ce426f
+
ce426f
+typedef long mantissa_t;
ce426f
+typedef int64_t mantissa_store_t;
ce426f
+
ce426f
+#define TWOPOW(i) (1L << i)
ce426f
+
ce426f
+#define RADIX_EXP 24
ce426f
+#define  RADIX TWOPOW (RADIX_EXP)		/* 2^24    */
ce426f
+
ce426f
+/* Divide D by RADIX and put the remainder in R.  D must be a non-negative
ce426f
+   integral value.  */
ce426f
+#define DIV_RADIX(d, r) \
ce426f
+  ({									      \
ce426f
+    r = d & (RADIX - 1);						      \
ce426f
+    d >>= RADIX_EXP;							      \
ce426f
+  })
ce426f
+
ce426f
+/* Put the integer component of a double X in R and retain the fraction in
ce426f
+   X.  This is used in extracting mantissa digits for MP_NO by using the
ce426f
+   integer portion of the current value of the number as the current mantissa
ce426f
+   digit and then scaling by RADIX to get the next mantissa digit in the same
ce426f
+   manner.  */
ce426f
+#define INTEGER_OF(x, i) \
ce426f
+  ({									      \
ce426f
+    i = (mantissa_t) x;							      \
ce426f
+    x -= i;								      \
ce426f
+  })
ce426f
+
ce426f
+/* Align IN down to F.  The code assumes that F is a power of two.  */
ce426f
+#define ALIGN_DOWN_TO(in, f) ((in) & -(f))
ce426f
Index: glibc-2.17-c758a686/sysdeps/powerpc/power4/fpu/mpa-arch.h
ce426f
===================================================================
ce426f
--- /dev/null
ce426f
+++ glibc-2.17-c758a686/sysdeps/powerpc/power4/fpu/mpa-arch.h
ce426f
@@ -0,0 +1,56 @@
ce426f
+/* Overridable constants and operations.
ce426f
+   Copyright (C) 2013-2017 Free Software Foundation, Inc.
ce426f
+
ce426f
+   This program is free software; you can redistribute it and/or modify
ce426f
+   it under the terms of the GNU Lesser General Public License as published by
ce426f
+   the Free Software Foundation; either version 2.1 of the License, or
ce426f
+   (at your option) any later version.
ce426f
+
ce426f
+   This program is distributed in the hope that it will be useful,
ce426f
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
ce426f
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
ce426f
+   GNU Lesser General Public License for more details.
ce426f
+
ce426f
+   You should have received a copy of the GNU Lesser General Public License
ce426f
+   along with this program; if not, see <http://www.gnu.org/licenses/>.  */
ce426f
+
ce426f
+typedef double mantissa_t;
ce426f
+typedef double mantissa_store_t;
ce426f
+
ce426f
+#define TWOPOW(i) (0x1.0p##i)
ce426f
+
ce426f
+#define RADIX TWOPOW (24)		/* 2^24    */
ce426f
+#define CUTTER TWOPOW (76)		/* 2^76    */
ce426f
+#define RADIXI 0x1.0p-24		/* 2^-24 */
ce426f
+#define TWO52 TWOPOW (52)		/* 2^52 */
ce426f
+
ce426f
+/* Divide D by RADIX and put the remainder in R.  */
ce426f
+#define DIV_RADIX(d,r) \
ce426f
+  ({									      \
ce426f
+    double u = ((d) + CUTTER) - CUTTER;					      \
ce426f
+    if (u > (d))							      \
ce426f
+      u -= RADIX;							      \
ce426f
+    r = (d) - u;							      \
ce426f
+    (d) = u * RADIXI;							      \
ce426f
+  })
ce426f
+
ce426f
+/* Put the integer component of a double X in R and retain the fraction in
ce426f
+   X.  */
ce426f
+#define INTEGER_OF(x, r) \
ce426f
+  ({									      \
ce426f
+    double u = ((x) + TWO52) - TWO52;					      \
ce426f
+    if (u > (x))							      \
ce426f
+      u -= 1;								      \
ce426f
+    (r) = u;								      \
ce426f
+    (x) -= u;								      \
ce426f
+  })
ce426f
+
ce426f
+/* Align IN down to a multiple of F, where F is a power of two.  */
ce426f
+#define ALIGN_DOWN_TO(in, f) \
ce426f
+  ({									      \
ce426f
+    double factor = f * TWO52;						      \
ce426f
+    double u = (in + factor) - factor;					      \
ce426f
+    if (u > in)								      \
ce426f
+      u -= f;								      \
ce426f
+    u;									      \
ce426f
+  })
ce426f
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/e_atan2.c
ce426f
===================================================================
ce426f
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/e_atan2.c
ce426f
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/e_atan2.c
ce426f
@@ -98,15 +98,15 @@ __ieee754_atan2(double y,double x) {
ce426f
   /* y=+-0 */
ce426f
   if      (uy==0x00000000) {
ce426f
     if    (dy==0x00000000) {
ce426f
-      if  ((ux&0x80000000)==0x00000000)  return ZERO;
ce426f
+      if  ((ux&0x80000000)==0x00000000)  return 0;
ce426f
       else                               return opi.d; } }
ce426f
   else if (uy==0x80000000) {
ce426f
     if    (dy==0x00000000) {
ce426f
-      if  ((ux&0x80000000)==0x00000000)  return MZERO;
ce426f
+      if  ((ux&0x80000000)==0x00000000)  return -0.0;
ce426f
       else                               return mopi.d;} }
ce426f
 
ce426f
   /* x=+-0 */
ce426f
-  if (x==ZERO) {
ce426f
+  if (x==0) {
ce426f
     if ((uy&0x80000000)==0x00000000)     return hpi.d;
ce426f
     else                                 return mhpi.d; }
ce426f
 
ce426f
@@ -118,8 +118,8 @@ __ieee754_atan2(double y,double x) {
ce426f
       else if (uy==0xfff00000) {
ce426f
 	if    (dy==0x00000000)  return mqpi.d; }
ce426f
       else {
ce426f
-	if    ((uy&0x80000000)==0x00000000)  return ZERO;
ce426f
-	else                                 return MZERO; }
ce426f
+	if    ((uy&0x80000000)==0x00000000)  return 0;
ce426f
+	else                                 return -0.0; }
ce426f
     }
ce426f
   }
ce426f
   else if     (ux==0xfff00000) {
ce426f
@@ -141,14 +141,14 @@ __ieee754_atan2(double y,double x) {
ce426f
     if    (dy==0x00000000)  return mhpi.d; }
ce426f
 
ce426f
   /* either x/y or y/x is very close to zero */
ce426f
-  ax = (x
ce426f
+  ax = (x<0) ? -x : x;    ay = (y<0) ? -y : y;
ce426f
   de = (uy & 0x7ff00000) - (ux & 0x7ff00000);
ce426f
-  if      (de>=ep)  { return ((y>ZERO) ? hpi.d : mhpi.d); }
ce426f
+  if      (de>=ep)  { return ((y>0) ? hpi.d : mhpi.d); }
ce426f
   else if (de<=em)  {
ce426f
-    if    (x>ZERO)  {
ce426f
+    if    (x>0)  {
ce426f
       if  ((z=ay/ax)
ce426f
       else                      return signArctan2(y,z); }
ce426f
-    else            { return ((y>ZERO) ? opi.d : mopi.d); } }
ce426f
+    else            { return ((y>0) ? opi.d : mopi.d); } }
ce426f
 
ce426f
   /* if either x or y is extremely close to zero, scale abs(x), abs(y). */
ce426f
   if (ax
ce426f
@@ -170,7 +170,7 @@ __ieee754_atan2(double y,double x) {
ce426f
     EMULV(ay,u,v,vv,t1,t2,t3,t4,t5)
ce426f
     du=((ax-v)-vv)/ay; }
ce426f
 
ce426f
-  if (x>ZERO) {
ce426f
+  if (x>0) {
ce426f
 
ce426f
     /* (i)   x>0, abs(y)< abs(x):  atan(ay/ax) */
ce426f
     if (ay
ce426f
@@ -180,7 +180,7 @@ __ieee754_atan2(double y,double x) {
ce426f
 
ce426f
 	MUL2(u,du,u,du,v,vv,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
 	s1=v*(f11.d+v*(f13.d+v*(f15.d+v*(f17.d+v*f19.d))));
ce426f
-	ADD2(f9.d,ff9.d,s1,ZERO,s2,ss2,t1,t2)
ce426f
+	ADD2(f9.d,ff9.d,s1,0,s2,ss2,t1,t2)
ce426f
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
 	ADD2(f7.d,ff7.d,s1,ss1,s2,ss2,t1,t2)
ce426f
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
@@ -212,7 +212,7 @@ __ieee754_atan2(double y,double x) {
ce426f
 	EADD(t1,du,v,vv)
ce426f
 	s1=v*(hij[i][11].d+v*(hij[i][12].d+v*(hij[i][13].d+
ce426f
 	   v*(hij[i][14].d+v* hij[i][15].d))));
ce426f
-	ADD2(hij[i][9].d,hij[i][10].d,s1,ZERO,s2,ss2,t1,t2)
ce426f
+	ADD2(hij[i][9].d,hij[i][10].d,s1,0,s2,ss2,t1,t2)
ce426f
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
 	ADD2(hij[i][7].d,hij[i][8].d,s1,ss1,s2,ss2,t1,t2)
ce426f
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
@@ -237,7 +237,7 @@ __ieee754_atan2(double y,double x) {
ce426f
 
ce426f
 	MUL2(u,du,u,du,v,vv,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
 	s1=v*(f11.d+v*(f13.d+v*(f15.d+v*(f17.d+v*f19.d))));
ce426f
-	ADD2(f9.d,ff9.d,s1,ZERO,s2,ss2,t1,t2)
ce426f
+	ADD2(f9.d,ff9.d,s1,0,s2,ss2,t1,t2)
ce426f
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
 	ADD2(f7.d,ff7.d,s1,ss1,s2,ss2,t1,t2)
ce426f
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
@@ -265,7 +265,7 @@ __ieee754_atan2(double y,double x) {
ce426f
 	EADD(t1,du,v,vv)
ce426f
 	s1=v*(hij[i][11].d+v*(hij[i][12].d+v*(hij[i][13].d+
ce426f
 	   v*(hij[i][14].d+v* hij[i][15].d))));
ce426f
-	ADD2(hij[i][9].d,hij[i][10].d,s1,ZERO,s2,ss2,t1,t2)
ce426f
+	ADD2(hij[i][9].d,hij[i][10].d,s1,0,s2,ss2,t1,t2)
ce426f
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
 	ADD2(hij[i][7].d,hij[i][8].d,s1,ss1,s2,ss2,t1,t2)
ce426f
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
@@ -293,7 +293,7 @@ __ieee754_atan2(double y,double x) {
ce426f
 
ce426f
 	MUL2(u,du,u,du,v,vv,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
 	s1=v*(f11.d+v*(f13.d+v*(f15.d+v*(f17.d+v*f19.d))));
ce426f
-	ADD2(f9.d,ff9.d,s1,ZERO,s2,ss2,t1,t2)
ce426f
+	ADD2(f9.d,ff9.d,s1,0,s2,ss2,t1,t2)
ce426f
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
 	ADD2(f7.d,ff7.d,s1,ss1,s2,ss2,t1,t2)
ce426f
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
@@ -321,7 +321,7 @@ __ieee754_atan2(double y,double x) {
ce426f
 	EADD(t1,du,v,vv)
ce426f
 	s1=v*(hij[i][11].d+v*(hij[i][12].d+v*(hij[i][13].d+
ce426f
 	   v*(hij[i][14].d+v* hij[i][15].d))));
ce426f
-	ADD2(hij[i][9].d,hij[i][10].d,s1,ZERO,s2,ss2,t1,t2)
ce426f
+	ADD2(hij[i][9].d,hij[i][10].d,s1,0,s2,ss2,t1,t2)
ce426f
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
 	ADD2(hij[i][7].d,hij[i][8].d,s1,ss1,s2,ss2,t1,t2)
ce426f
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
@@ -347,7 +347,7 @@ __ieee754_atan2(double y,double x) {
ce426f
 
ce426f
 	MUL2(u,du,u,du,v,vv,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
 	s1=v*(f11.d+v*(f13.d+v*(f15.d+v*(f17.d+v*f19.d))));
ce426f
-	ADD2(f9.d,ff9.d,s1,ZERO,s2,ss2,t1,t2)
ce426f
+	ADD2(f9.d,ff9.d,s1,0,s2,ss2,t1,t2)
ce426f
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
 	ADD2(f7.d,ff7.d,s1,ss1,s2,ss2,t1,t2)
ce426f
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
@@ -375,7 +375,7 @@ __ieee754_atan2(double y,double x) {
ce426f
 	EADD(t1,du,v,vv)
ce426f
 	s1=v*(hij[i][11].d+v*(hij[i][12].d+v*(hij[i][13].d+
ce426f
 	   v*(hij[i][14].d+v* hij[i][15].d))));
ce426f
-	ADD2(hij[i][9].d,hij[i][10].d,s1,ZERO,s2,ss2,t1,t2)
ce426f
+	ADD2(hij[i][9].d,hij[i][10].d,s1,0,s2,ss2,t1,t2)
ce426f
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
 	ADD2(hij[i][7].d,hij[i][8].d,s1,ss1,s2,ss2,t1,t2)
ce426f
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/e_log.c
ce426f
===================================================================
ce426f
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/e_log.c
ce426f
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/e_log.c
ce426f
@@ -78,9 +78,9 @@ __ieee754_log(double x) {
ce426f
   n=0;
ce426f
   if (__builtin_expect(ux < 0x00100000, 0)) {
ce426f
     if (__builtin_expect(((ux & 0x7fffffff) | dx) == 0, 0))
ce426f
-      return MHALF/ZERO; /* return -INF */
ce426f
+      return MHALF/0; /* return -INF */
ce426f
     if (__builtin_expect(ux < 0, 0))
ce426f
-      return (x-x)/ZERO;                         /* return NaN  */
ce426f
+      return (x-x)/0;                         /* return NaN  */
ce426f
     n -= 54;    x *= two54.d;                              /* scale x     */
ce426f
     num.d = x;
ce426f
   }
ce426f
@@ -89,7 +89,7 @@ __ieee754_log(double x) {
ce426f
 
ce426f
   /* Regular values of x */
ce426f
 
ce426f
-  w = x-ONE;
ce426f
+  w = x-1;
ce426f
   if (__builtin_expect(ABS(w) > U03, 1)) { goto case_03; }
ce426f
 
ce426f
 
ce426f
@@ -113,25 +113,25 @@ __ieee754_log(double x) {
ce426f
 	    w*(d17.d+w*(d18.d+w*(d19.d+w*d20.d))))))));
ce426f
   EMULV(w,a,s2,ss2,t1,t2,t3,t4,t5)
ce426f
   ADD2(d10.d,dd10.d,s2,ss2,s3,ss3,t1,t2)
ce426f
-  MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
+  MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
   ADD2(d9.d,dd9.d,s2,ss2,s3,ss3,t1,t2)
ce426f
-  MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
+  MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
   ADD2(d8.d,dd8.d,s2,ss2,s3,ss3,t1,t2)
ce426f
-  MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
+  MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
   ADD2(d7.d,dd7.d,s2,ss2,s3,ss3,t1,t2)
ce426f
-  MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
+  MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
   ADD2(d6.d,dd6.d,s2,ss2,s3,ss3,t1,t2)
ce426f
-  MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
+  MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
   ADD2(d5.d,dd5.d,s2,ss2,s3,ss3,t1,t2)
ce426f
-  MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
+  MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
   ADD2(d4.d,dd4.d,s2,ss2,s3,ss3,t1,t2)
ce426f
-  MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
+  MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
   ADD2(d3.d,dd3.d,s2,ss2,s3,ss3,t1,t2)
ce426f
-  MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
+  MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
   ADD2(d2.d,dd2.d,s2,ss2,s3,ss3,t1,t2)
ce426f
-  MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
-  MUL2(w,ZERO,s2,ss2,s3,ss3,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
-  ADD2(w,ZERO,    s3,ss3, b, bb,t1,t2)
ce426f
+  MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
+  MUL2(w,0,s2,ss2,s3,ss3,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
+  ADD2(w,0,    s3,ss3, b, bb,t1,t2)
ce426f
 
ce426f
   /* End stage II, case abs(x-1) < 0.03 */
ce426f
   if ((y=b+(bb+b*E4)) == b+(bb-b*E4))  return y;
ce426f
@@ -155,7 +155,7 @@ __ieee754_log(double x) {
ce426f
   j = (num.i[HIGH_HALF] & 0x000fffff) >> 4;
ce426f
 
ce426f
   /* Compute w=(u-ui*vj)/(ui*vj) */
ce426f
-  p0=(ONE+(i-75)*DEL_U)*(ONE+(j-180)*DEL_V);
ce426f
+  p0=(1+(i-75)*DEL_U)*(1+(j-180)*DEL_V);
ce426f
   q=u-p0;   r0=Iu[i].d*Iv[j].d;   w=q*r0;
ce426f
 
ce426f
   /* Evaluate polynomial I */
ce426f
@@ -178,11 +178,11 @@ __ieee754_log(double x) {
ce426f
 
ce426f
   /* Improve the accuracy of r0 */
ce426f
   EMULV(p0,r0,sa,sb,t1,t2,t3,t4,t5)
ce426f
-  t=r0*((ONE-sa)-sb);
ce426f
+  t=r0*((1-sa)-sb);
ce426f
   EADD(r0,t,ra,rb)
ce426f
 
ce426f
   /* Compute w */
ce426f
-  MUL2(q,ZERO,ra,rb,w,ww,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
+  MUL2(q,0,ra,rb,w,ww,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
 
ce426f
   EADD(A,B0,a0,aa0)
ce426f
 
ce426f
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/s_atan.c
ce426f
===================================================================
ce426f
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/s_atan.c
ce426f
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/s_atan.c
ce426f
@@ -82,7 +82,7 @@ double atan(double x) {
ce426f
     return x+x;
ce426f
 
ce426f
   /* Regular values of x, including denormals +-0 and +-INF */
ce426f
-  u = (x
ce426f
+  u = (x<0) ? -x : x;
ce426f
   if (u
ce426f
     if (u
ce426f
       if (u
ce426f
@@ -93,7 +93,7 @@ double atan(double x) {
ce426f
 
ce426f
 	EMULV(x,x,v,vv,t1,t2,t3,t4,t5)                       /* v+vv=x^2 */
ce426f
 	s1=v*(f11.d+v*(f13.d+v*(f15.d+v*(f17.d+v*f19.d))));
ce426f
-	ADD2(f9.d,ff9.d,s1,ZERO,s2,ss2,t1,t2)
ce426f
+	ADD2(f9.d,ff9.d,s1,0,s2,ss2,t1,t2)
ce426f
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
 	ADD2(f7.d,ff7.d,s1,ss1,s2,ss2,t1,t2)
ce426f
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
@@ -101,8 +101,8 @@ double atan(double x) {
ce426f
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
 	ADD2(f3.d,ff3.d,s1,ss1,s2,ss2,t1,t2)
ce426f
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
-	MUL2(x,ZERO,s1,ss1,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
-	ADD2(x,ZERO,s2,ss2,s1,ss1,t1,t2)
ce426f
+	MUL2(x,0,s1,ss1,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
+	ADD2(x,0,s2,ss2,s1,ss1,t1,t2)
ce426f
 	if ((y=s1+(ss1-U5*s1)) == s1+(ss1+U5*s1))  return y;
ce426f
 
ce426f
 	return atanMp(x,pr);
ce426f
@@ -124,14 +124,14 @@ double atan(double x) {
ce426f
       z=u-hij[i][0].d;
ce426f
       s1=z*(hij[i][11].d+z*(hij[i][12].d+z*(hij[i][13].d+
ce426f
 	 z*(hij[i][14].d+z* hij[i][15].d))));
ce426f
-      ADD2(hij[i][9].d,hij[i][10].d,s1,ZERO,s2,ss2,t1,t2)
ce426f
-      MUL2(z,ZERO,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
+      ADD2(hij[i][9].d,hij[i][10].d,s1,0,s2,ss2,t1,t2)
ce426f
+      MUL2(z,0,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
       ADD2(hij[i][7].d,hij[i][8].d,s1,ss1,s2,ss2,t1,t2)
ce426f
-      MUL2(z,ZERO,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
+      MUL2(z,0,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
       ADD2(hij[i][5].d,hij[i][6].d,s1,ss1,s2,ss2,t1,t2)
ce426f
-      MUL2(z,ZERO,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
+      MUL2(z,0,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
       ADD2(hij[i][3].d,hij[i][4].d,s1,ss1,s2,ss2,t1,t2)
ce426f
-      MUL2(z,ZERO,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
+      MUL2(z,0,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
       ADD2(hij[i][1].d,hij[i][2].d,s1,ss1,s2,ss2,t1,t2)
ce426f
       if ((y=s2+(ss2-U6*s2)) == s2+(ss2+U6*s2))  return __signArctan(x,y);
ce426f
 
ce426f
@@ -140,9 +140,9 @@ double atan(double x) {
ce426f
   }
ce426f
   else {
ce426f
     if (u
ce426f
-      w=ONE/u;
ce426f
+      w=1/u;
ce426f
       EMULV(w,u,t1,t2,t3,t4,t5,t6,t7)
ce426f
-      ww=w*((ONE-t1)-t2);
ce426f
+      ww=w*((1-t1)-t2);
ce426f
       i=(TWO52+TWO8*w)-TWO52;  i-=16;
ce426f
       z=(w-cij[i][0].d)+ww;
ce426f
       yy=HPI1-z*(cij[i][2].d+z*(cij[i][3].d+z*(cij[i][4].d+
ce426f
@@ -152,12 +152,12 @@ double atan(double x) {
ce426f
       else        u3=U32;  /* w >= 1/2 */
ce426f
       if ((y=t1+(yy-u3)) == t1+(yy+u3))  return __signArctan(x,y);
ce426f
 
ce426f
-      DIV2(ONE,ZERO,u,ZERO,w,ww,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10)
ce426f
+      DIV2(1,0,u,0,w,ww,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10)
ce426f
       t1=w-hij[i][0].d;
ce426f
       EADD(t1,ww,z,zz)
ce426f
       s1=z*(hij[i][11].d+z*(hij[i][12].d+z*(hij[i][13].d+
ce426f
 	 z*(hij[i][14].d+z* hij[i][15].d))));
ce426f
-      ADD2(hij[i][9].d,hij[i][10].d,s1,ZERO,s2,ss2,t1,t2)
ce426f
+      ADD2(hij[i][9].d,hij[i][10].d,s1,0,s2,ss2,t1,t2)
ce426f
       MUL2(z,zz,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
       ADD2(hij[i][7].d,hij[i][8].d,s1,ss1,s2,ss2,t1,t2)
ce426f
       MUL2(z,zz,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
@@ -173,18 +173,18 @@ double atan(double x) {
ce426f
     }
ce426f
     else {
ce426f
       if (u
ce426f
-	w=ONE/u;   v=w*w;
ce426f
+	w=1/u;   v=w*w;
ce426f
 	EMULV(w,u,t1,t2,t3,t4,t5,t6,t7)
ce426f
 	yy=w*v*(d3.d+v*(d5.d+v*(d7.d+v*(d9.d+v*(d11.d+v*d13.d)))));
ce426f
-	ww=w*((ONE-t1)-t2);
ce426f
+	ww=w*((1-t1)-t2);
ce426f
 	ESUB(HPI,w,t3,cor)
ce426f
 	yy=((HPI1+cor)-ww)-yy;
ce426f
 	if ((y=t3+(yy-U4)) == t3+(yy+U4))  return __signArctan(x,y);
ce426f
 
ce426f
-	DIV2(ONE,ZERO,u,ZERO,w,ww,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10)
ce426f
+	DIV2(1,0,u,0,w,ww,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10)
ce426f
 	MUL2(w,ww,w,ww,v,vv,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
 	s1=v*(f11.d+v*(f13.d+v*(f15.d+v*(f17.d+v*f19.d))));
ce426f
-	ADD2(f9.d,ff9.d,s1,ZERO,s2,ss2,t1,t2)
ce426f
+	ADD2(f9.d,ff9.d,s1,0,s2,ss2,t1,t2)
ce426f
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
 	ADD2(f7.d,ff7.d,s1,ss1,s2,ss2,t1,t2)
ce426f
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
ce426f
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/s_tan.c
ce426f
===================================================================
ce426f
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/s_tan.c
ce426f
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/s_tan.c
ce426f
@@ -84,7 +84,7 @@ tan(double x) {
ce426f
     goto ret;
ce426f
   }
ce426f
 
ce426f
-  w=(x
ce426f
+  w=(x<0) ? -x : x;
ce426f
 
ce426f
   /* (I) The case abs(x) <= 1.259e-8 */
ce426f
   if (w<=g1.d) { retval = x; goto ret; }
ce426f
@@ -125,11 +125,11 @@ tan(double x) {
ce426f
 
ce426f
     /* First stage */
ce426f
     i = ((int) (mfftnhf.d+TWO8*w));
ce426f
-    z = w-xfg[i][0].d;  z2 = z*z;   s = (x
ce426f
+    z = w-xfg[i][0].d;  z2 = z*z;   s = (x<0) ? -1 : 1;
ce426f
     pz = z+z*z2*(e0.d+z2*e1.d);
ce426f
     fi = xfg[i][1].d;   gi = xfg[i][2].d;   t2 = pz*(gi+fi)/(gi-pz);
ce426f
     if ((y=fi+(t2-fi*u3.d))==fi+(t2+fi*u3.d)) { retval = (s*y); goto ret; }
ce426f
-    t3 = (t2
ce426f
+    t3 = (t2<0) ? -t2 : t2;
ce426f
     t4 = fi*ua3.d+t3*ub3.d;
ce426f
     if ((y=fi+(t2-t4))==fi+(t2+t4)) { retval = (s*y); goto ret; }
ce426f
 
ce426f
@@ -165,8 +165,8 @@ tan(double x) {
ce426f
     da = xn*mp3.d;
ce426f
     a=t1-da;
ce426f
     da = (t1-a)-da;
ce426f
-    if (a
ce426f
-    else         {ya= a;  yya= da;  sy= ONE;}
ce426f
+    if (a<0)  {ya=-a;  yya=-da;  sy=-1;}
ce426f
+    else         {ya= a;  yya= da;  sy= 1;}
ce426f
 
ce426f
     /* (IV),(V) The case 0.787 < abs(x) <= 25,    abs(y) <= 1e-7 */
ce426f
     if (ya<=gy1.d) { retval = tanMp(x); goto ret; }
ce426f
@@ -240,14 +240,14 @@ tan(double x) {
ce426f
       /* -cot */
ce426f
       t2 = pz*(fi+gi)/(fi+pz);
ce426f
       if ((y=gi-(t2-gi*u10.d))==gi-(t2+gi*u10.d)) { retval = (-sy*y); goto ret; }
ce426f
-      t3 = (t2
ce426f
+      t3 = (t2<0) ? -t2 : t2;
ce426f
       t4 = gi*ua10.d+t3*ub10.d;
ce426f
       if ((y=gi-(t2-t4))==gi-(t2+t4)) { retval = (-sy*y); goto ret; } }
ce426f
     else   {
ce426f
       /* tan */
ce426f
       t2 = pz*(gi+fi)/(gi-pz);
ce426f
       if ((y=fi+(t2-fi*u9.d))==fi+(t2+fi*u9.d)) { retval = (sy*y); goto ret; }
ce426f
-      t3 = (t2
ce426f
+      t3 = (t2<0) ? -t2 : t2;
ce426f
       t4 = fi*ua9.d+t3*ub9.d;
ce426f
       if ((y=fi+(t2-t4))==fi+(t2+t4)) { retval = (sy*y); goto ret; } }
ce426f
 
ce426f
@@ -295,8 +295,8 @@ tan(double x) {
ce426f
     a = t - t1;
ce426f
     da = ((t-a)-t1)+da;
ce426f
     EADD(a,da,t1,t2)   a=t1;  da=t2;
ce426f
-    if (a
ce426f
-    else         {ya= a;  yya= da;  sy= ONE;}
ce426f
+    if (a<0)  {ya=-a;  yya=-da;  sy=-1;}
ce426f
+    else         {ya= a;  yya= da;  sy= 1;}
ce426f
 
ce426f
     /* (+++) The case 25 < abs(x) <= 1e8,    abs(y) <= 1e-7 */
ce426f
     if (ya<=gy1.d) { retval = tanMp(x); goto ret; }
ce426f
@@ -355,14 +355,14 @@ tan(double x) {
ce426f
       /* -cot */
ce426f
       t2 = pz*(fi+gi)/(fi+pz);
ce426f
       if ((y=gi-(t2-gi*u18.d))==gi-(t2+gi*u18.d)) { retval = (-sy*y); goto ret; }
ce426f
-      t3 = (t2
ce426f
+      t3 = (t2<0) ? -t2 : t2;
ce426f
       t4 = gi*ua18.d+t3*ub18.d;
ce426f
       if ((y=gi-(t2-t4))==gi-(t2+t4)) { retval = (-sy*y); goto ret; } }
ce426f
     else   {
ce426f
       /* tan */
ce426f
       t2 = pz*(gi+fi)/(gi-pz);
ce426f
       if ((y=fi+(t2-fi*u17.d))==fi+(t2+fi*u17.d)) { retval = (sy*y); goto ret; }
ce426f
-      t3 = (t2
ce426f
+      t3 = (t2<0) ? -t2 : t2;
ce426f
       t4 = fi*ua17.d+t3*ub17.d;
ce426f
       if ((y=fi+(t2-t4))==fi+(t2+t4)) { retval = (sy*y); goto ret; } }
ce426f
 
ce426f
@@ -398,8 +398,8 @@ tan(double x) {
ce426f
   /* Range reduction by algorithm iii */
ce426f
   n = (__branred(x,&a,&da)) & 0x00000001;
ce426f
   EADD(a,da,t1,t2)   a=t1;  da=t2;
ce426f
-  if (a
ce426f
-  else         {ya= a;  yya= da;  sy= ONE;}
ce426f
+  if (a<0)  {ya=-a;  yya=-da;  sy=-1;}
ce426f
+  else         {ya= a;  yya= da;  sy= 1;}
ce426f
 
ce426f
   /* (+++) The case 1e8 < abs(x) < 2**1024,    abs(y) <= 1e-7 */
ce426f
   if (ya<=gy1.d) { retval = tanMp(x); goto ret; }
ce426f
@@ -463,14 +463,14 @@ tan(double x) {
ce426f
     /* -cot */
ce426f
     t2 = pz*(fi+gi)/(fi+pz);
ce426f
     if ((y=gi-(t2-gi*u26.d))==gi-(t2+gi*u26.d)) { retval = (-sy*y); goto ret; }
ce426f
-    t3 = (t2
ce426f
+    t3 = (t2<0) ? -t2 : t2;
ce426f
     t4 = gi*ua26.d+t3*ub26.d;
ce426f
     if ((y=gi-(t2-t4))==gi-(t2+t4)) { retval = (-sy*y); goto ret; } }
ce426f
   else   {
ce426f
     /* tan */
ce426f
     t2 = pz*(gi+fi)/(gi-pz);
ce426f
     if ((y=fi+(t2-fi*u25.d))==fi+(t2+fi*u25.d)) { retval = (sy*y); goto ret; }
ce426f
-    t3 = (t2
ce426f
+    t3 = (t2<0) ? -t2 : t2;
ce426f
     t4 = fi*ua25.d+t3*ub25.d;
ce426f
     if ((y=fi+(t2-t4))==fi+(t2+t4)) { retval = (sy*y); goto ret; } }
ce426f
 
ce426f
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpatan.c
ce426f
===================================================================
ce426f
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpatan.c
ce426f
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpatan.c
ce426f
@@ -69,8 +69,8 @@ __mpatan(mp_no *x, mp_no *y, int p) {
ce426f
 	{if (dx>__atan_xm[m].d) break;}
ce426f
     }
ce426f
     mpone.e    = mptwo.e    = mptwoim1.e = 1;
ce426f
-    mpone.d[0] = mpone.d[1] = mptwo.d[0] = mptwoim1.d[0] = ONE;
ce426f
-    mptwo.d[1] = TWO;
ce426f
+    mpone.d[0] = mpone.d[1] = mptwo.d[0] = mptwoim1.d[0] = 1;
ce426f
+    mptwo.d[1] = 2;
ce426f
 
ce426f
 				 /* Reduce x m times */
ce426f
     __mul(x,x,&mpsm,p);
ce426f
@@ -92,7 +92,7 @@ __mpatan(mp_no *x, mp_no *y, int p) {
ce426f
     n=__atan_np[p];    mptwoim1.d[1] = __atan_twonm1[p].d;
ce426f
     __dvd(&mpsm,&mptwoim1,&mpt,p);
ce426f
     for (i=n-1; i>1; i--) {
ce426f
-      mptwoim1.d[1] -= TWO;
ce426f
+      mptwoim1.d[1] -= 2;
ce426f
       __dvd(&mpsm,&mptwoim1,&mpt1,p);
ce426f
       __mul(&mpsm,&mpt,&mpt2,p);
ce426f
       __sub(&mpt1,&mpt2,&mpt,p);
ce426f
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpsqrt.c
ce426f
===================================================================
ce426f
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpsqrt.c
ce426f
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpsqrt.c
ce426f
@@ -62,8 +62,8 @@ __mpsqrt(mp_no *x, mp_no *y, int p) {
ce426f
   mp_no mpxn,mpz,mpu,mpt1,mpt2;
ce426f
 
ce426f
   /* Prepare multi-precision 1/2 and 3/2 */
ce426f
-  mphalf.e  =0;  mphalf.d[0]  =ONE;  mphalf.d[1]  =HALFRAD;
ce426f
-  mp3halfs.e=1;  mp3halfs.d[0]=ONE;  mp3halfs.d[1]=ONE;  mp3halfs.d[2]=HALFRAD;
ce426f
+  mphalf.e  =0;  mphalf.d[0]  =1;  mphalf.d[1]  =HALFRAD;
ce426f
+  mp3halfs.e=1;  mp3halfs.d[0]=1;  mp3halfs.d[1]=1;  mp3halfs.d[2]=HALFRAD;
ce426f
 
ce426f
   ey=EX/2;     __cpy(x,&mpxn,p);    mpxn.e -= (ey+ey);
ce426f
   __mp_dbl(&mpxn,&dx,p);   dy=fastiroot(dx);    __dbl_mp(dy,&mpu,p);