From 535104ab6a2d6f22098f79e7107963e3fc3448a3 Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Wed, 4 Sep 2013 15:51:05 +0000 Subject: [PATCH] math: cbrt cleanup and long double fix * use float_t and double_t * cleanup subnormal handling * bithacks according to the new convention (ldshape for long double and explicit unions for float and double) --- src/math/cbrt.c | 36 +++++++++++++-------------- src/math/cbrtf.c | 31 +++++++++++------------ src/math/cbrtl.c | 64 +++++++++++++++++++++--------------------------- 3 files changed, 59 insertions(+), 72 deletions(-) diff --git a/src/math/cbrt.c b/src/math/cbrt.c index f4253428..7599d3e3 100644 --- a/src/math/cbrt.c +++ b/src/math/cbrt.c @@ -15,7 +15,8 @@ * Return cube root of x */ -#include "libm.h" +#include +#include static const uint32_t B1 = 715094163, /* B1 = (1023-1023/3-0.03306235651)*2**20 */ @@ -31,15 +32,10 @@ P4 = 0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */ double cbrt(double x) { - int32_t hx; - union dshape u; - double r,s,t=0.0,w; - uint32_t sign; - uint32_t high,low; + union {double f; uint64_t i;} u = {x}; + double_t r,s,t,w; + uint32_t hx = u.i>>32 & 0x7fffffff; - EXTRACT_WORDS(hx, low, x); - sign = hx & 0x80000000; - hx ^= sign; if (hx >= 0x7ff00000) /* cbrt(NaN,INF) is itself */ return x+x; @@ -59,14 +55,16 @@ double cbrt(double x) * division rounds towards minus infinity; this is also efficient. */ if (hx < 0x00100000) { /* zero or subnormal? */ - if ((hx|low) == 0) + u.f = x*0x1p54; + hx = u.i>>32 & 0x7fffffff; + if (hx == 0) return x; /* cbrt(0) is itself */ - SET_HIGH_WORD(t, 0x43500000); /* set t = 2**54 */ - t *= x; - GET_HIGH_WORD(high, t); - INSERT_WORDS(t, sign|((high&0x7fffffff)/3+B2), 0); + hx = hx/3 + B2; } else - INSERT_WORDS(t, sign|(hx/3+B1), 0); + hx = hx/3 + B1; + u.i &= 1ULL<<63; + u.i |= (uint64_t)hx << 32; + t = u.f; /* * New cbrt to 23 bits: @@ -76,7 +74,7 @@ double cbrt(double x) * has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this * gives us bounds for r = t**3/x. * - * Try to optimize for parallel evaluation as in k_tanf.c. + * Try to optimize for parallel evaluation as in __tanf.c. */ r = (t*t)*(t/x); t = t*((P0+r*(P1+r*P2))+((r*r)*r)*(P3+r*P4)); @@ -91,9 +89,9 @@ double cbrt(double x) * 0.667; the error in the rounded t can be up to about 3 23-bit ulps * before the final error is larger than 0.667 ulps. */ - u.value = t; - u.bits = (u.bits + 0x80000000) & 0xffffffffc0000000ULL; - t = u.value; + u.f = t; + u.i = (u.i + 0x80000000) & 0xffffffffc0000000ULL; + t = u.f; /* one step Newton iteration to 53 bits with error < 0.667 ulps */ s = t*t; /* t*t is exact */ diff --git a/src/math/cbrtf.c b/src/math/cbrtf.c index 4a984b10..89c2c865 100644 --- a/src/math/cbrtf.c +++ b/src/math/cbrtf.c @@ -17,7 +17,8 @@ * Return cube root of x */ -#include "libm.h" +#include +#include static const unsigned B1 = 709958130, /* B1 = (127-127.0/3-0.03306235651)*2**23 */ @@ -25,15 +26,10 @@ B2 = 642849266; /* B2 = (127-127.0/3-24/3-0.03306235651)*2**23 */ float cbrtf(float x) { - double r,T; - float t; - int32_t hx; - uint32_t sign; - uint32_t high; + double_t r,T; + union {float f; uint32_t i;} u = {x}; + uint32_t hx = u.i & 0x7fffffff; - GET_FLOAT_WORD(hx, x); - sign = hx & 0x80000000; - hx ^= sign; if (hx >= 0x7f800000) /* cbrt(NaN,INF) is itself */ return x + x; @@ -41,28 +37,29 @@ float cbrtf(float x) if (hx < 0x00800000) { /* zero or subnormal? */ if (hx == 0) return x; /* cbrt(+-0) is itself */ - SET_FLOAT_WORD(t, 0x4b800000); /* set t = 2**24 */ - t *= x; - GET_FLOAT_WORD(high, t); - SET_FLOAT_WORD(t, sign|((high&0x7fffffff)/3+B2)); + u.f = x*0x1p24f; + hx = u.i & 0x7fffffff; + hx = hx/3 + B2; } else - SET_FLOAT_WORD(t, sign|(hx/3+B1)); + hx = hx/3 + B1; + u.i &= 0x80000000; + u.i |= hx; /* * First step Newton iteration (solving t*t-x/t == 0) to 16 bits. In * double precision so that its terms can be arranged for efficiency * without causing overflow or underflow. */ - T = t; + T = u.f; r = T*T*T; - T = T*((double)x+x+r)/(x+r+r); + T = T*((double_t)x+x+r)/(x+r+r); /* * Second step Newton iteration to 47 bits. In double precision for * efficiency and accuracy. */ r = T*T*T; - T = T*((double)x+x+r)/(x+r+r); + T = T*((double_t)x+x+r)/(x+r+r); /* rounding to 24 bits is perfect in round-to-nearest mode */ return T; diff --git a/src/math/cbrtl.c b/src/math/cbrtl.c index 0af65ef5..ceff9136 100644 --- a/src/math/cbrtl.c +++ b/src/math/cbrtl.c @@ -23,58 +23,50 @@ long double cbrtl(long double x) return cbrt(x); } #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 - -#define BIAS (LDBL_MAX_EXP - 1) static const unsigned B1 = 709958130; /* B1 = (127-127.0/3-0.03306235651)*2**23 */ long double cbrtl(long double x) { - union IEEEl2bits u, v; + union ldshape u = {x}, v; + union {float f; uint32_t i;} uft; long double r, s, t, w; - double dr, dt, dx; - float ft, fx; - uint32_t hx; - uint16_t expsign; - int k; - - u.e = x; - expsign = u.xbits.expsign; - k = expsign & 0x7fff; + double_t dr, dt, dx; + float_t ft; + int e = u.i.se & 0x7fff; + int sign = u.i.se & 0x8000; /* * If x = +-Inf, then cbrt(x) = +-Inf. * If x = NaN, then cbrt(x) = NaN. */ - if (k == BIAS + LDBL_MAX_EXP) + if (e == 0x7fff) return x + x; - - if (k == 0) { + if (e == 0) { + /* Adjust subnormal numbers. */ + u.f *= 0x1p120; + e = u.i.se & 0x7fff; /* If x = +-0, then cbrt(x) = +-0. */ - if ((u.bits.manh | u.bits.manl) == 0) + if (e == 0) return x; - /* Adjust subnormal numbers. */ - u.e *= 0x1.0p514; - k = u.bits.exp; - k -= BIAS + 514; - } else - k -= BIAS; - u.xbits.expsign = BIAS; - v.e = 1; - - x = u.e; - switch (k % 3) { + e -= 120; + } + e -= 0x3fff; + u.i.se = 0x3fff; + x = u.f; + switch (e % 3) { case 1: case -2: - x = 2*x; - k--; + x *= 2; + e--; break; case 2: case -1: - x = 4*x; - k -= 2; + x *= 4; + e -= 2; break; } - v.xbits.expsign = (expsign & 0x8000) | (BIAS + k / 3); + v.f = 1.0; + v.i.se = sign | (0x3fff + e/3); /* * The following is the guts of s_cbrtf, with the handling of @@ -83,9 +75,9 @@ long double cbrtl(long double x) */ /* ~5-bit estimate: */ - fx = x; - GET_FLOAT_WORD(hx, fx); - SET_FLOAT_WORD(ft, ((hx & 0x7fffffff) / 3 + B1)); + uft.f = x; + uft.i = (uft.i & 0x7fffffff)/3 + B1; + ft = uft.f; /* ~16-bit estimate: */ dx = x; @@ -126,7 +118,7 @@ long double cbrtl(long double x) r = (r-t)/(w+r); /* r-t is exact; w+r ~= 3*t */ t = t+t*r; /* error <= 0.5 + 0.5/3 + epsilon */ - t *= v.e; + t *= v.f; return t; } #endif -- 2.40.0