From 1377150f6e7125f2d4c359dda6402dd28a27a74f Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Tue, 2 Oct 2001 23:24:15 +0000 Subject: [PATCH] Finish ASCII->internal FP conversion and clean up single/double/extended conversion functions. For accuracy, change from 64-bit to 80-bit internal mantissa. Modify tests to match new internal format. TODO: Denormalized numbers, and write more tests! svn path=/trunk/yasm/; revision=251 --- libyasm/floatnum.c | 603 +++++++++++++++++++++++++++------- libyasm/floatnum.h | 12 +- libyasm/tests/floatnum_test.c | 8 +- src/floatnum.c | 603 +++++++++++++++++++++++++++------- src/floatnum.h | 12 +- src/tests/floatnum_test.c | 8 +- 6 files changed, 986 insertions(+), 260 deletions(-) diff --git a/libyasm/floatnum.c b/libyasm/floatnum.c index 50656aa6..b2303758 100644 --- a/libyasm/floatnum.c +++ b/libyasm/floatnum.c @@ -28,6 +28,7 @@ #include "util.h" #include +#include #ifdef STDC_HEADERS # include @@ -50,17 +51,276 @@ RCSID("$IdPath$"); +/* constants describing parameters of internal floating point format */ +#define MANT_BITS 80 +#define MANT_BYTES 10 +#define MANT_SIGDIGITS 24 +#define EXP_BIAS 0x7FFF +#define EXP_INF 0xFFFF +#define EXP_MAX 0xFFFE +#define EXP_MIN 1 +#define EXP_ZERO 0 + +/* Flag settings for flags field */ +#define FLAG_ISZERO 1<<0 + +/* Note this structure integrates the floatnum structure */ +typedef struct POT_Entry_s { + floatnum f; + int dec_exponent; +} POT_Entry; + +/* "Source" for POT_Entry. */ +typedef struct POT_Entry_Source_s { + unsigned char mantissa[MANT_BYTES]; /* little endian mantissa */ + unsigned short exponent; /* Bias 32767 exponent */ +} POT_Entry_Source; + +/* Power of ten tables used by the floating point I/O routines. + * The POT_Table? arrays are built from the POT_Table?_Source arrays at + * runtime by POT_Table_Init(). + */ + +/* This table contains the powers of ten raised to negative powers of two: + * + * entry[12-n] = 10 ** (-2 ** n) for 0 <= n <= 12. + * entry[13] = 1.0 + */ +static POT_Entry *POT_TableN = (POT_Entry *)NULL; +static POT_Entry_Source POT_TableN_Source[] = { + {{0xe3,0x2d,0xde,0x9f,0xce,0xd2,0xc8,0x04,0xdd,0xa6},0x4ad8}, /* 1e-4096 */ + {{0x25,0x49,0xe4,0x2d,0x36,0x34,0x4f,0x53,0xae,0xce},0x656b}, /* 1e-2048 */ + {{0xa6,0x87,0xbd,0xc0,0x57,0xda,0xa5,0x82,0xa6,0xa2},0x72b5}, /* 1e-1024 */ + {{0x33,0x71,0x1c,0xd2,0x23,0xdb,0x32,0xee,0x49,0x90},0x795a}, /* 1e-512 */ + {{0x91,0xfa,0x39,0x19,0x7a,0x63,0x25,0x43,0x31,0xc0},0x7cac}, /* 1e-256 */ + {{0x7d,0xac,0xa0,0xe4,0xbc,0x64,0x7c,0x46,0xd0,0xdd},0x7e55}, /* 1e-128 */ + {{0x24,0x3f,0xa5,0xe9,0x39,0xa5,0x27,0xea,0x7f,0xa8},0x7f2a}, /* 1e-64 */ + {{0xde,0x67,0xba,0x94,0x39,0x45,0xad,0x1e,0xb1,0xcf},0x7f94}, /* 1e-32 */ + {{0x2f,0x4c,0x5b,0xe1,0x4d,0xc4,0xbe,0x94,0x95,0xe6},0x7fc9}, /* 1e-16 */ + {{0xc2,0xfd,0xfc,0xce,0x61,0x84,0x11,0x77,0xcc,0xab},0x7fe4}, /* 1e-8 */ + {{0xc3,0xd3,0x2b,0x65,0x19,0xe2,0x58,0x17,0xb7,0xd1},0x7ff1}, /* 1e-4 */ + {{0x71,0x3d,0x0a,0xd7,0xa3,0x70,0x3d,0x0a,0xd7,0xa3},0x7ff8}, /* 1e-2 */ + {{0xcd,0xcc,0xcc,0xcc,0xcc,0xcc,0xcc,0xcc,0xcc,0xcc},0x7ffb}, /* 1e-1 */ + {{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80},0x7fff}, /* 1e-0 */ +}; + +/* This table contains the powers of ten raised to positive powers of two: + * + * entry[12-n] = 10 ** (2 ** n) for 0 <= n <= 12. + * entry[13] = 1.0 + * entry[-1] = entry[0]; + * + * There is a -1 entry since it is possible for the algorithm to back up + * before the table. This -1 entry is created at runtime by duplicating the + * 0 entry. + */ +static POT_Entry *POT_TableP; +static POT_Entry_Source POT_TableP_Source[] = { + {{0x4c,0xc9,0x9a,0x97,0x20,0x8a,0x02,0x52,0x60,0xc4},0xb525}, /* 1e+4096 */ + {{0x4d,0xa7,0xe4,0x5d,0x3d,0xc5,0x5d,0x3b,0x8b,0x9e},0x9a92}, /* 1e+2048 */ + {{0x0d,0x65,0x17,0x0c,0x75,0x81,0x86,0x75,0x76,0xc9},0x8d48}, /* 1e+1024 */ + {{0x65,0xcc,0xc6,0x91,0x0e,0xa6,0xae,0xa0,0x19,0xe3},0x86a3}, /* 1e+512 */ + {{0xbc,0xdd,0x8d,0xde,0xf9,0x9d,0xfb,0xeb,0x7e,0xaa},0x8351}, /* 1e+256 */ + {{0x6f,0xc6,0xdf,0x8c,0xe9,0x80,0xc9,0x47,0xba,0x93},0x81a8}, /* 1e+128 */ + {{0xbf,0x3c,0xd5,0xa6,0xcf,0xff,0x49,0x1f,0x78,0xc2},0x80d3}, /* 1e+64 */ + {{0x20,0xf0,0x9d,0xb5,0x70,0x2b,0xa8,0xad,0xc5,0x9d},0x8069}, /* 1e+32 */ + {{0x00,0x00,0x00,0x00,0x00,0x04,0xbf,0xc9,0x1b,0x8e},0x8034}, /* 1e+16 */ + {{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0xbc,0xbe},0x8019}, /* 1e+8 */ + {{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x40,0x9c},0x800c}, /* 1e+4 */ + {{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xc8},0x8005}, /* 1e+2 */ + {{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xa0},0x8002}, /* 1e+1 */ + {{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80},0x7fff}, /* 1e+0 */ +}; + +static void +POT_Table_Init_Entry(POT_Entry *e, POT_Entry_Source *s, int dec_exp) +{ + /* Save decimal exponent */ + e->dec_exponent = dec_exp; + + /* Initialize mantissa */ + e->f.mantissa = BitVector_Create(MANT_BITS, FALSE); + if (!e->f.mantissa) + Fatal(FATAL_NOMEM); + BitVector_Block_Store(e->f.mantissa, s->mantissa, MANT_BYTES); + + /* Initialize exponent */ + e->f.exponent = s->exponent; + + /* Set sign to 0 (positive) */ + e->f.sign = 0; + + /* Clear flags */ + e->f.flags = 0; +} + +static void +POT_Table_Init(void) +{ + unsigned int dec_exp = 1; + int i; + + /* Allocate space for two POT tables */ + POT_TableN = malloc(14*sizeof(POT_Entry)); + if (!POT_TableN) + Fatal(FATAL_NOMEM); + POT_TableP = malloc(15*sizeof(POT_Entry)); /* note 1 extra for -1 */ + if (!POT_TableP) + Fatal(FATAL_NOMEM); + + /* Initialize entry[0..12] */ + for (i=12; i>=0; i--) { + POT_Table_Init_Entry(&POT_TableN[i], &POT_TableN_Source[i], 0-dec_exp); + POT_Table_Init_Entry(&POT_TableP[i+1], &POT_TableP_Source[i], dec_exp); + dec_exp *= 2; /* Update decimal exponent */ + } + + /* Initialize entry[13] */ + POT_Table_Init_Entry(&POT_TableN[13], &POT_TableN_Source[13], 0); + POT_Table_Init_Entry(&POT_TableP[14], &POT_TableP_Source[13], 0); + + /* Initialize entry[-1] for POT_TableP */ + POT_Table_Init_Entry(&POT_TableP[0], &POT_TableP_Source[0], 4096); + + /* Offset POT_TableP so that [0] becomes [-1] */ + POT_TableP++; +} + +static void +floatnum_normalize(floatnum *flt) +{ + int norm_amt; + + if (BitVector_is_empty(flt->mantissa)) { + flt->exponent = 0; + return; + } + + /* Look for the highest set bit, shift to make it the MSB, and adjust + * exponent. Don't let exponent go negative. */ + norm_amt = (MANT_BITS-1)-Set_Max(flt->mantissa); + if (norm_amt > flt->exponent) + norm_amt = flt->exponent; + BitVector_Move_Left(flt->mantissa, norm_amt); + flt->exponent -= norm_amt; +} + +/* acc *= op */ +static void +floatnum_mul(floatnum *acc, const floatnum *op) +{ + int exp; + unsigned int *product, *op1, *op2; + int norm_amt; + + /* Compute the new sign */ + acc->sign ^= op->sign; + + /* Check for multiply by 0 */ + if (BitVector_is_empty(acc->mantissa) || BitVector_is_empty(op->mantissa)) { + BitVector_Empty(acc->mantissa); + acc->exponent = EXP_ZERO; + return; + } + + /* Add exponents, checking for overflow/underflow. */ + exp = (((int)acc->exponent)-EXP_BIAS) + (((int)op->exponent)-EXP_BIAS); + exp += EXP_BIAS; + if (exp > EXP_MAX) { + /* Overflow; return infinity. */ + BitVector_Empty(acc->mantissa); + acc->exponent = EXP_INF; + return; + } else if (exp < EXP_MIN) { + /* Underflow; return zero. */ + BitVector_Empty(acc->mantissa); + acc->exponent = EXP_ZERO; + return; + } + + /* Add one to the final exponent, as the multiply shifts one extra time. */ + acc->exponent = exp+1; + + /* Allocate space for the multiply result */ + product = BitVector_Create((MANT_BITS+1)*2, FALSE); + if (!product) + Fatal(FATAL_NOMEM); + + /* Allocate 1-bit-longer fields to force the operands to be unsigned */ + op1 = BitVector_Create(MANT_BITS+1, FALSE); + if (!op1) + Fatal(FATAL_NOMEM); + op2 = BitVector_Create(MANT_BITS+1, FALSE); + if (!op2) + Fatal(FATAL_NOMEM); + + /* Make the operands unsigned after copying from original operands */ + BitVector_Copy(op1, acc->mantissa); + BitVector_MSB(op1, 0); + BitVector_Copy(op2, op->mantissa); + BitVector_MSB(op2, 0); + + /* Compute the product of the mantissas */ + BitVector_Multiply(product, op1, op2); + + /* Normalize the product. Note: we know the product is non-zero because + * both of the original operands were non-zero. + * + * Look for the highest set bit, shift to make it the MSB, and adjust + * exponent. Don't let exponent go negative. + */ + norm_amt = (MANT_BITS*2-1)-Set_Max(product); + if (norm_amt > acc->exponent) + norm_amt = acc->exponent; + BitVector_Move_Left(product, norm_amt); + acc->exponent -= norm_amt; + + /* Store the highest bits of the result */ + BitVector_Interval_Copy(acc->mantissa, product, 0, MANT_BITS, MANT_BITS); + + /* Free allocated variables */ + BitVector_Destroy(product); + BitVector_Destroy(op1); + BitVector_Destroy(op2); +} + floatnum * floatnum_new(char *str) { - floatnum *flt = malloc(sizeof(floatnum)); + floatnum *flt; + int dec_exponent, dec_exp_add; /* decimal (powers of 10) exponent */ + int POT_index; + unsigned int *operand[2]; + int sig_digits; + int decimal_pt; + boolean carry; + + /* Initialize POT tables if necessary */ + if (!POT_TableN) + POT_Table_Init(); + + flt = malloc(sizeof(floatnum)); if (!flt) Fatal(FATAL_NOMEM); - flt->mantissa = BitVector_Create(64, TRUE); + flt->mantissa = BitVector_Create(MANT_BITS, TRUE); if (!flt->mantissa) Fatal(FATAL_NOMEM); + /* allocate and initialize calculation variables */ + operand[0] = BitVector_Create(MANT_BITS, TRUE); + if (!operand[0]) + Fatal(FATAL_NOMEM); + operand[1] = BitVector_Create(MANT_BITS, TRUE); + if (!operand[1]) + Fatal(FATAL_NOMEM); + dec_exponent = 0; + sig_digits = 0; + decimal_pt = 1; + + /* set initial flags to 0 */ + flt->flags = 0; + /* check for + or - character and skip */ if (*str == '-') { flt->sign = 1; @@ -71,6 +331,153 @@ floatnum_new(char *str) } else flt->sign = 0; + /* eliminate any leading zeros (which do not count as significant digits) */ + while (*str == '0') + str++; + + /* When we reach the end of the leading zeros, first check for a decimal + * point. If the number is of the form "0---0.0000" we need to get rid + * of the zeros after the decimal point and not count them as significant + * digits. + */ + if (*str == '.') { + str++; + while (*str == '0') { + str++; + dec_exponent--; + } + } else { + /* The number is of the form "yyy.xxxx" (where y <> 0). */ + while (isdigit(*str)) { + /* See if we've processed more than the max significant digits: */ + if (sig_digits < MANT_SIGDIGITS) { + /* Multiply mantissa by 10 [x = (x<<1)+(x<<3)] */ + BitVector_shift_left(flt->mantissa, 0); + BitVector_Copy(operand[0], flt->mantissa); + BitVector_Move_Left(flt->mantissa, 2); + carry = 0; + BitVector_add(operand[1], operand[0], flt->mantissa, &carry); + + /* Add in current digit */ + BitVector_Empty(operand[0]); + BitVector_Chunk_Store(operand[0], 4, 0, *str-'0'); + carry = 0; + BitVector_add(flt->mantissa, operand[1], operand[0], &carry); + } else { + /* Can't integrate more digits with mantissa, so instead just + * raise by a power of ten. + */ + dec_exponent++; + } + sig_digits++; + str++; + } + + if (*str == '.') + str++; + else + decimal_pt = 0; + } + + if (decimal_pt) { + /* Process the digits to the right of the decimal point. */ + while (isdigit(*str)) { + /* See if we've processed more than 19 significant digits: */ + if (sig_digits < 19) { + /* Raise by a power of ten */ + dec_exponent--; + + /* Multiply mantissa by 10 [x = (x<<1)+(x<<3)] */ + BitVector_shift_left(flt->mantissa, 0); + BitVector_Copy(operand[0], flt->mantissa); + BitVector_Move_Left(flt->mantissa, 2); + carry = 0; + BitVector_add(operand[1], operand[0], flt->mantissa, &carry); + + /* Add in current digit */ + BitVector_Empty(operand[0]); + BitVector_Chunk_Store(operand[0], 4, 0, *str-'0'); + carry = 0; + BitVector_add(flt->mantissa, operand[1], operand[0], &carry); + } + sig_digits++; + str++; + } + } + + if (*str == 'e' || *str == 'E') { + str++; + /* We just saw the "E" character, now read in the exponent value and + * add it into dec_exponent. + */ + dec_exp_add = 0; + sscanf(str, "%d", &dec_exp_add); + dec_exponent += dec_exp_add; + } + + /* Normalize the number, checking for 0 first. */ + if (BitVector_is_empty(flt->mantissa)) { + /* Mantissa is 0, zero exponent too. */ + flt->exponent = 0; + /* Set zero flag so output functions don't see 0 value as underflow. */ + flt->flags |= FLAG_ISZERO; + /* Free calculation variables and return. */ + BitVector_Destroy(operand[1]); + BitVector_Destroy(operand[0]); + + return flt; + } + flt->exponent = 0x7FFF+(MANT_BITS-1); /* Exponent if already norm. */ + floatnum_normalize(flt); + + /* The number is normalized. Now multiply by 10 the number of times + * specified in DecExponent. This uses the power of ten tables to speed + * up this operation (and make it more accurate). + */ + if (dec_exponent > 0) { + POT_index = 0; + /* Until we hit 1.0 or finish exponent or overflow */ + while ((POT_index < 14) && (dec_exponent != 0) && + (flt->exponent != EXP_INF)) { + /* Find the first power of ten in the table which is just less than + * the exponent. + */ + while (dec_exponent < POT_TableP[POT_index].dec_exponent) + POT_index++; + + if (POT_index < 14) { + /* Subtract out what we're multiplying in from exponent */ + dec_exponent -= POT_TableP[POT_index].dec_exponent; + + /* Multiply by current power of 10 */ + floatnum_mul(flt, &POT_TableP[POT_index].f); + } + } + } else if (dec_exponent < 0) { + POT_index = 0; + /* Until we hit 1.0 or finish exponent or underflow */ + while ((POT_index < 14) && (dec_exponent != 0) && + (flt->exponent != EXP_ZERO)) { + /* Find the first power of ten in the table which is just less than + * the exponent. + */ + while (dec_exponent > POT_TableN[POT_index].dec_exponent) + POT_index++; + + if (POT_index < 14) { + /* Subtract out what we're multiplying in from exponent */ + dec_exponent -= POT_TableN[POT_index].dec_exponent; + + /* Multiply by current power of 10 */ + floatnum_mul(flt, &POT_TableN[POT_index].f); + } + } + } + + /* Round the result. (Don't round underflow or overflow). */ + if ((flt->exponent != EXP_INF) && (flt->exponent != EXP_ZERO)) + BitVector_increment(flt->mantissa); + return flt; } @@ -86,75 +493,114 @@ floatnum_get_int(unsigned long *ret_val, const floatnum *flt) return 0; } -/* IEEE-754 (Intel) "single precision" format: - * 32 bits: - * Bit 31 Bit 22 Bit 0 - * | | | - * seeeeeee emmmmmmm mmmmmmmm mmmmmmmm +/* Function used by single, double, and extended conversion routines to actually + * perform the conversion. * - * e = bias 127 exponent - * s = sign bit - * m = mantissa bits, bit 23 is an implied one bit. + * ptr -> the array to return the little-endian floating point value into. + * flt -> the floating point value to convert. + * byte_size -> the size in bytes of the output format. + * mant_bits -> the size in bits of the output mantissa. + * implicit1 -> does the output format have an implicit 1? 1=yes, 0=no. + * exp_bits -> the size in bits of the output exponent. + * + * Returns 0 on success, 1 if overflow, -1 if underflow. */ -int -floatnum_get_single(unsigned char *ptr, const floatnum *flt) +static int +floatnum_get_common(unsigned char *ptr, const floatnum *flt, int byte_size, + int mant_bits, int implicit1, int exp_bits) { - unsigned long exponent = flt->exponent; + int exponent = flt->exponent; unsigned int *output; unsigned char *buf; unsigned int len; + unsigned int overflow = 0, underflow = 0, retval = 0; + int exp_bias = (1<<(exp_bits-1))-1; + int exp_inf = (1<mantissa, 0, 40, 23); + BitVector_Interval_Copy(output, flt->mantissa, 0, + (MANT_BITS-implicit1)-mant_bits, mant_bits); /* round mantissa */ - BitVector_increment(output); + if (BitVector_bit_test(flt->mantissa, (MANT_BITS-implicit1)-(mant_bits+1))) + BitVector_increment(output); - if (BitVector_bit_test(output, 23)) { + if (BitVector_bit_test(output, mant_bits)) { /* overflowed, so divide mantissa by 2 (shift right) */ BitVector_shift_right(output, 0); /* and up the exponent (checking for overflow) */ - if (exponent+1 >= 0x10000) { - BitVector_Destroy(output); - return 1; /* exponent too large */ - } - exponent++; + if (exponent+1 >= EXP_INF) + overflow = 1; + else + exponent++; } - /* adjust the exponent to bias 127 */ - exponent -= 32767-127; - if (exponent >= 256) { - BitVector_Destroy(output); - return 1; /* exponent too large */ + /* adjust the exponent to the output bias, checking for overflow */ + exponent -= EXP_BIAS-exp_bias; + if (exponent >= exp_inf) + overflow = 1; + else if (exponent <= 0) + underflow = 1; + + /* underflow and overflow both set!? */ + if (underflow && overflow) + InternalError(__LINE__, __FILE__, _("Both underflow and overflow set")); + + /* check for underflow or overflow and set up appropriate output */ + if (underflow) { + BitVector_Empty(output); + exponent = 0; + if (!(flt->flags & FLAG_ISZERO)) + retval = -1; + } else if (overflow) { + BitVector_Empty(output); + exponent = exp_inf; + retval = 1; } /* move exponent into place */ - BitVector_Chunk_Store(output, 8, 23, exponent); + BitVector_Chunk_Store(output, exp_bits, mant_bits, exponent); /* merge in sign bit */ - BitVector_Bit_Copy(output, 31, flt->sign); + BitVector_Bit_Copy(output, byte_size*8-1, flt->sign); /* get little-endian bytes */ buf = BitVector_Block_Read(output, &len); if (!buf) Fatal(FATAL_NOMEM); - if (len != 4) + if (len < byte_size) InternalError(__LINE__, __FILE__, - _("Length of 32-bit BitVector not 4")); + _("Byte length of BitVector does not match bit length")); /* copy to output */ - memcpy(ptr, buf, 4*sizeof(unsigned char)); + memcpy(ptr, buf, byte_size*sizeof(unsigned char)); /* free allocated resources */ free(buf); BitVector_Destroy(output); - return 0; + return retval; +} + +/* IEEE-754 (Intel) "single precision" format: + * 32 bits: + * Bit 31 Bit 22 Bit 0 + * | | | + * seeeeeee emmmmmmm mmmmmmmm mmmmmmmm + * + * e = bias 127 exponent + * s = sign bit + * m = mantissa bits, bit 23 is an implied one bit. + */ +int +floatnum_get_single(unsigned char *ptr, const floatnum *flt) +{ + return floatnum_get_common(ptr, flt, 4, 23, 1, 8); } /* IEEE-754 (Intel) "double precision" format: @@ -170,62 +616,7 @@ floatnum_get_single(unsigned char *ptr, const floatnum *flt) int floatnum_get_double(unsigned char *ptr, const floatnum *flt) { - unsigned long exponent = flt->exponent; - unsigned int *output; - unsigned char *buf; - unsigned int len; - - output = BitVector_Create(64, TRUE); - if (!output) - Fatal(FATAL_NOMEM); - - /* copy mantissa */ - BitVector_Interval_Copy(output, flt->mantissa, 0, 11, 52); - - /* round mantissa */ - BitVector_increment(output); - - if (BitVector_bit_test(output, 52)) { - /* overflowed, so divide mantissa by 2 (shift right) */ - BitVector_shift_right(output, 0); - /* and up the exponent (checking for overflow) */ - if (exponent+1 >= 0x10000) { - BitVector_Destroy(output); - return 1; /* exponent too large */ - } - exponent++; - } - - /* adjust the exponent to bias 1023 */ - exponent -= 32767-1023; - if (exponent >= 2048) { - BitVector_Destroy(output); - return 1; /* exponent too large */ - } - - /* move exponent into place */ - BitVector_Chunk_Store(output, 11, 52, exponent); - - /* merge in sign bit */ - BitVector_Bit_Copy(output, 63, flt->sign); - - /* get little-endian bytes */ - buf = BitVector_Block_Read(output, &len); - if (!buf) - Fatal(FATAL_NOMEM); - if (len != 8) - InternalError(__LINE__, __FILE__, - _("Length of 64-bit BitVector not 8")); - - /* copy to output */ - memcpy(ptr, buf, 8*sizeof(unsigned char)); - - /* free allocated resources */ - free(buf); - - BitVector_Destroy(output); - - return 0; + return floatnum_get_common(ptr, flt, 8, 52, 1, 11); } /* IEEE-754 (Intel) "extended precision" format: @@ -234,46 +625,18 @@ floatnum_get_double(unsigned char *ptr, const floatnum *flt) * | | | * seeeeeee eeeeeeee mmmmmmmm m...m m...m m...m m...m m...m * - * e = bias 16384 exponent + * e = bias 16383 exponent * m = 64 bit mantissa with NO implied bit! * s = sign (for mantissa) */ int floatnum_get_extended(unsigned char *ptr, const floatnum *flt) { - unsigned short exponent = flt->exponent; - unsigned char *buf; - unsigned int len; - - /* Adjust the exponent to bias 16384 */ - exponent -= 0x4000; - if (exponent >= 0x8000) - return 1; /* exponent too large */ - - /* get little-endian bytes */ - buf = BitVector_Block_Read(flt->mantissa, &len); - if (!buf) - Fatal(FATAL_NOMEM); - if (len != 8) - InternalError(__LINE__, __FILE__, - _("Length of 64-bit BitVector not 8")); - - /* copy to output */ - memcpy(ptr, buf, 8*sizeof(unsigned char)); - - /* Save exponent and sign in proper location */ - SAVE_SHORT(&ptr[8], exponent); - if (flt->sign) - ptr[9] |= 0x80; - - /* free allocated resources */ - free(buf); - - return 0; + return floatnum_get_common(ptr, flt, 10, 64, 0, 15); } void floatnum_print(const floatnum *flt) { - /* TODO */ + /* TODO */ } diff --git a/libyasm/floatnum.h b/libyasm/floatnum.h index 8ac8bf6f..e40099c4 100644 --- a/libyasm/floatnum.h +++ b/libyasm/floatnum.h @@ -24,20 +24,20 @@ #ifndef YASM_FLOATNUM_H #define YASM_FLOATNUM_H -/* 88-bit internal floating point format: - * xxxxxxxs eeeeeeee eeeeeeee m..m m..m m..m m..m m..m m..m m..m m..m - * Sign exponent mantissa (64 bits) - * | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0| - * 63 55 47 39 31 23 15 7 0 +/* 97-bit internal floating point format: + * xxxxxxxs eeeeeeee eeeeeeee m.....................................m + * Sign exponent mantissa (80 bits) + * 79 0 * * Only L.O. bit of Sign byte is significant. The rest is garbage. - * Exponent is bias 32767 exponent. + * Exponent is bias 32767. * Mantissa does NOT have an implied one bit (it's explicit). */ typedef struct floatnum_s { unsigned int *mantissa; /* Allocated to 64 bits */ unsigned short exponent; unsigned char sign; + unsigned char flags; } floatnum; floatnum *floatnum_new(char *str); diff --git a/libyasm/tests/floatnum_test.c b/libyasm/tests/floatnum_test.c index cc093631..d94797c6 100644 --- a/libyasm/tests/floatnum_test.c +++ b/libyasm/tests/floatnum_test.c @@ -13,7 +13,7 @@ void get_family_setup(void) { flt = malloc(sizeof(floatnum)); - flt->mantissa = BitVector_Create(64, TRUE); + flt->mantissa = BitVector_Create(80, TRUE); } void get_family_teardown(void) @@ -25,12 +25,12 @@ void get_family_teardown(void) void pi_setup(void) { /* test value: 3.141592653589793 */ - /* 80-bit little endian hex: E9 BD 68 21 A2 DA 0F C9 00 40 */ - unsigned char val[] = {0xE9, 0xBD, 0x68, 0x21, 0xA2, 0xDA, 0x0F, 0xC9}; + /* 80-bit little endian mantissa: C6 0D E9 BD 68 21 A2 DA 0F C9 */ + unsigned char val[] = {0xC6, 0x0D, 0xE9, 0xBD, 0x68, 0x21, 0xA2, 0xDA, 0x0F, 0xC9}; unsigned char sign = 0; unsigned short exp = 32767 + 1; - BitVector_Block_Store(flt->mantissa, val, 8); + BitVector_Block_Store(flt->mantissa, val, 10); flt->sign = sign; flt->exponent = exp; } diff --git a/src/floatnum.c b/src/floatnum.c index 50656aa6..b2303758 100644 --- a/src/floatnum.c +++ b/src/floatnum.c @@ -28,6 +28,7 @@ #include "util.h" #include +#include #ifdef STDC_HEADERS # include @@ -50,17 +51,276 @@ RCSID("$IdPath$"); +/* constants describing parameters of internal floating point format */ +#define MANT_BITS 80 +#define MANT_BYTES 10 +#define MANT_SIGDIGITS 24 +#define EXP_BIAS 0x7FFF +#define EXP_INF 0xFFFF +#define EXP_MAX 0xFFFE +#define EXP_MIN 1 +#define EXP_ZERO 0 + +/* Flag settings for flags field */ +#define FLAG_ISZERO 1<<0 + +/* Note this structure integrates the floatnum structure */ +typedef struct POT_Entry_s { + floatnum f; + int dec_exponent; +} POT_Entry; + +/* "Source" for POT_Entry. */ +typedef struct POT_Entry_Source_s { + unsigned char mantissa[MANT_BYTES]; /* little endian mantissa */ + unsigned short exponent; /* Bias 32767 exponent */ +} POT_Entry_Source; + +/* Power of ten tables used by the floating point I/O routines. + * The POT_Table? arrays are built from the POT_Table?_Source arrays at + * runtime by POT_Table_Init(). + */ + +/* This table contains the powers of ten raised to negative powers of two: + * + * entry[12-n] = 10 ** (-2 ** n) for 0 <= n <= 12. + * entry[13] = 1.0 + */ +static POT_Entry *POT_TableN = (POT_Entry *)NULL; +static POT_Entry_Source POT_TableN_Source[] = { + {{0xe3,0x2d,0xde,0x9f,0xce,0xd2,0xc8,0x04,0xdd,0xa6},0x4ad8}, /* 1e-4096 */ + {{0x25,0x49,0xe4,0x2d,0x36,0x34,0x4f,0x53,0xae,0xce},0x656b}, /* 1e-2048 */ + {{0xa6,0x87,0xbd,0xc0,0x57,0xda,0xa5,0x82,0xa6,0xa2},0x72b5}, /* 1e-1024 */ + {{0x33,0x71,0x1c,0xd2,0x23,0xdb,0x32,0xee,0x49,0x90},0x795a}, /* 1e-512 */ + {{0x91,0xfa,0x39,0x19,0x7a,0x63,0x25,0x43,0x31,0xc0},0x7cac}, /* 1e-256 */ + {{0x7d,0xac,0xa0,0xe4,0xbc,0x64,0x7c,0x46,0xd0,0xdd},0x7e55}, /* 1e-128 */ + {{0x24,0x3f,0xa5,0xe9,0x39,0xa5,0x27,0xea,0x7f,0xa8},0x7f2a}, /* 1e-64 */ + {{0xde,0x67,0xba,0x94,0x39,0x45,0xad,0x1e,0xb1,0xcf},0x7f94}, /* 1e-32 */ + {{0x2f,0x4c,0x5b,0xe1,0x4d,0xc4,0xbe,0x94,0x95,0xe6},0x7fc9}, /* 1e-16 */ + {{0xc2,0xfd,0xfc,0xce,0x61,0x84,0x11,0x77,0xcc,0xab},0x7fe4}, /* 1e-8 */ + {{0xc3,0xd3,0x2b,0x65,0x19,0xe2,0x58,0x17,0xb7,0xd1},0x7ff1}, /* 1e-4 */ + {{0x71,0x3d,0x0a,0xd7,0xa3,0x70,0x3d,0x0a,0xd7,0xa3},0x7ff8}, /* 1e-2 */ + {{0xcd,0xcc,0xcc,0xcc,0xcc,0xcc,0xcc,0xcc,0xcc,0xcc},0x7ffb}, /* 1e-1 */ + {{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80},0x7fff}, /* 1e-0 */ +}; + +/* This table contains the powers of ten raised to positive powers of two: + * + * entry[12-n] = 10 ** (2 ** n) for 0 <= n <= 12. + * entry[13] = 1.0 + * entry[-1] = entry[0]; + * + * There is a -1 entry since it is possible for the algorithm to back up + * before the table. This -1 entry is created at runtime by duplicating the + * 0 entry. + */ +static POT_Entry *POT_TableP; +static POT_Entry_Source POT_TableP_Source[] = { + {{0x4c,0xc9,0x9a,0x97,0x20,0x8a,0x02,0x52,0x60,0xc4},0xb525}, /* 1e+4096 */ + {{0x4d,0xa7,0xe4,0x5d,0x3d,0xc5,0x5d,0x3b,0x8b,0x9e},0x9a92}, /* 1e+2048 */ + {{0x0d,0x65,0x17,0x0c,0x75,0x81,0x86,0x75,0x76,0xc9},0x8d48}, /* 1e+1024 */ + {{0x65,0xcc,0xc6,0x91,0x0e,0xa6,0xae,0xa0,0x19,0xe3},0x86a3}, /* 1e+512 */ + {{0xbc,0xdd,0x8d,0xde,0xf9,0x9d,0xfb,0xeb,0x7e,0xaa},0x8351}, /* 1e+256 */ + {{0x6f,0xc6,0xdf,0x8c,0xe9,0x80,0xc9,0x47,0xba,0x93},0x81a8}, /* 1e+128 */ + {{0xbf,0x3c,0xd5,0xa6,0xcf,0xff,0x49,0x1f,0x78,0xc2},0x80d3}, /* 1e+64 */ + {{0x20,0xf0,0x9d,0xb5,0x70,0x2b,0xa8,0xad,0xc5,0x9d},0x8069}, /* 1e+32 */ + {{0x00,0x00,0x00,0x00,0x00,0x04,0xbf,0xc9,0x1b,0x8e},0x8034}, /* 1e+16 */ + {{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0xbc,0xbe},0x8019}, /* 1e+8 */ + {{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x40,0x9c},0x800c}, /* 1e+4 */ + {{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xc8},0x8005}, /* 1e+2 */ + {{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xa0},0x8002}, /* 1e+1 */ + {{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80},0x7fff}, /* 1e+0 */ +}; + +static void +POT_Table_Init_Entry(POT_Entry *e, POT_Entry_Source *s, int dec_exp) +{ + /* Save decimal exponent */ + e->dec_exponent = dec_exp; + + /* Initialize mantissa */ + e->f.mantissa = BitVector_Create(MANT_BITS, FALSE); + if (!e->f.mantissa) + Fatal(FATAL_NOMEM); + BitVector_Block_Store(e->f.mantissa, s->mantissa, MANT_BYTES); + + /* Initialize exponent */ + e->f.exponent = s->exponent; + + /* Set sign to 0 (positive) */ + e->f.sign = 0; + + /* Clear flags */ + e->f.flags = 0; +} + +static void +POT_Table_Init(void) +{ + unsigned int dec_exp = 1; + int i; + + /* Allocate space for two POT tables */ + POT_TableN = malloc(14*sizeof(POT_Entry)); + if (!POT_TableN) + Fatal(FATAL_NOMEM); + POT_TableP = malloc(15*sizeof(POT_Entry)); /* note 1 extra for -1 */ + if (!POT_TableP) + Fatal(FATAL_NOMEM); + + /* Initialize entry[0..12] */ + for (i=12; i>=0; i--) { + POT_Table_Init_Entry(&POT_TableN[i], &POT_TableN_Source[i], 0-dec_exp); + POT_Table_Init_Entry(&POT_TableP[i+1], &POT_TableP_Source[i], dec_exp); + dec_exp *= 2; /* Update decimal exponent */ + } + + /* Initialize entry[13] */ + POT_Table_Init_Entry(&POT_TableN[13], &POT_TableN_Source[13], 0); + POT_Table_Init_Entry(&POT_TableP[14], &POT_TableP_Source[13], 0); + + /* Initialize entry[-1] for POT_TableP */ + POT_Table_Init_Entry(&POT_TableP[0], &POT_TableP_Source[0], 4096); + + /* Offset POT_TableP so that [0] becomes [-1] */ + POT_TableP++; +} + +static void +floatnum_normalize(floatnum *flt) +{ + int norm_amt; + + if (BitVector_is_empty(flt->mantissa)) { + flt->exponent = 0; + return; + } + + /* Look for the highest set bit, shift to make it the MSB, and adjust + * exponent. Don't let exponent go negative. */ + norm_amt = (MANT_BITS-1)-Set_Max(flt->mantissa); + if (norm_amt > flt->exponent) + norm_amt = flt->exponent; + BitVector_Move_Left(flt->mantissa, norm_amt); + flt->exponent -= norm_amt; +} + +/* acc *= op */ +static void +floatnum_mul(floatnum *acc, const floatnum *op) +{ + int exp; + unsigned int *product, *op1, *op2; + int norm_amt; + + /* Compute the new sign */ + acc->sign ^= op->sign; + + /* Check for multiply by 0 */ + if (BitVector_is_empty(acc->mantissa) || BitVector_is_empty(op->mantissa)) { + BitVector_Empty(acc->mantissa); + acc->exponent = EXP_ZERO; + return; + } + + /* Add exponents, checking for overflow/underflow. */ + exp = (((int)acc->exponent)-EXP_BIAS) + (((int)op->exponent)-EXP_BIAS); + exp += EXP_BIAS; + if (exp > EXP_MAX) { + /* Overflow; return infinity. */ + BitVector_Empty(acc->mantissa); + acc->exponent = EXP_INF; + return; + } else if (exp < EXP_MIN) { + /* Underflow; return zero. */ + BitVector_Empty(acc->mantissa); + acc->exponent = EXP_ZERO; + return; + } + + /* Add one to the final exponent, as the multiply shifts one extra time. */ + acc->exponent = exp+1; + + /* Allocate space for the multiply result */ + product = BitVector_Create((MANT_BITS+1)*2, FALSE); + if (!product) + Fatal(FATAL_NOMEM); + + /* Allocate 1-bit-longer fields to force the operands to be unsigned */ + op1 = BitVector_Create(MANT_BITS+1, FALSE); + if (!op1) + Fatal(FATAL_NOMEM); + op2 = BitVector_Create(MANT_BITS+1, FALSE); + if (!op2) + Fatal(FATAL_NOMEM); + + /* Make the operands unsigned after copying from original operands */ + BitVector_Copy(op1, acc->mantissa); + BitVector_MSB(op1, 0); + BitVector_Copy(op2, op->mantissa); + BitVector_MSB(op2, 0); + + /* Compute the product of the mantissas */ + BitVector_Multiply(product, op1, op2); + + /* Normalize the product. Note: we know the product is non-zero because + * both of the original operands were non-zero. + * + * Look for the highest set bit, shift to make it the MSB, and adjust + * exponent. Don't let exponent go negative. + */ + norm_amt = (MANT_BITS*2-1)-Set_Max(product); + if (norm_amt > acc->exponent) + norm_amt = acc->exponent; + BitVector_Move_Left(product, norm_amt); + acc->exponent -= norm_amt; + + /* Store the highest bits of the result */ + BitVector_Interval_Copy(acc->mantissa, product, 0, MANT_BITS, MANT_BITS); + + /* Free allocated variables */ + BitVector_Destroy(product); + BitVector_Destroy(op1); + BitVector_Destroy(op2); +} + floatnum * floatnum_new(char *str) { - floatnum *flt = malloc(sizeof(floatnum)); + floatnum *flt; + int dec_exponent, dec_exp_add; /* decimal (powers of 10) exponent */ + int POT_index; + unsigned int *operand[2]; + int sig_digits; + int decimal_pt; + boolean carry; + + /* Initialize POT tables if necessary */ + if (!POT_TableN) + POT_Table_Init(); + + flt = malloc(sizeof(floatnum)); if (!flt) Fatal(FATAL_NOMEM); - flt->mantissa = BitVector_Create(64, TRUE); + flt->mantissa = BitVector_Create(MANT_BITS, TRUE); if (!flt->mantissa) Fatal(FATAL_NOMEM); + /* allocate and initialize calculation variables */ + operand[0] = BitVector_Create(MANT_BITS, TRUE); + if (!operand[0]) + Fatal(FATAL_NOMEM); + operand[1] = BitVector_Create(MANT_BITS, TRUE); + if (!operand[1]) + Fatal(FATAL_NOMEM); + dec_exponent = 0; + sig_digits = 0; + decimal_pt = 1; + + /* set initial flags to 0 */ + flt->flags = 0; + /* check for + or - character and skip */ if (*str == '-') { flt->sign = 1; @@ -71,6 +331,153 @@ floatnum_new(char *str) } else flt->sign = 0; + /* eliminate any leading zeros (which do not count as significant digits) */ + while (*str == '0') + str++; + + /* When we reach the end of the leading zeros, first check for a decimal + * point. If the number is of the form "0---0.0000" we need to get rid + * of the zeros after the decimal point and not count them as significant + * digits. + */ + if (*str == '.') { + str++; + while (*str == '0') { + str++; + dec_exponent--; + } + } else { + /* The number is of the form "yyy.xxxx" (where y <> 0). */ + while (isdigit(*str)) { + /* See if we've processed more than the max significant digits: */ + if (sig_digits < MANT_SIGDIGITS) { + /* Multiply mantissa by 10 [x = (x<<1)+(x<<3)] */ + BitVector_shift_left(flt->mantissa, 0); + BitVector_Copy(operand[0], flt->mantissa); + BitVector_Move_Left(flt->mantissa, 2); + carry = 0; + BitVector_add(operand[1], operand[0], flt->mantissa, &carry); + + /* Add in current digit */ + BitVector_Empty(operand[0]); + BitVector_Chunk_Store(operand[0], 4, 0, *str-'0'); + carry = 0; + BitVector_add(flt->mantissa, operand[1], operand[0], &carry); + } else { + /* Can't integrate more digits with mantissa, so instead just + * raise by a power of ten. + */ + dec_exponent++; + } + sig_digits++; + str++; + } + + if (*str == '.') + str++; + else + decimal_pt = 0; + } + + if (decimal_pt) { + /* Process the digits to the right of the decimal point. */ + while (isdigit(*str)) { + /* See if we've processed more than 19 significant digits: */ + if (sig_digits < 19) { + /* Raise by a power of ten */ + dec_exponent--; + + /* Multiply mantissa by 10 [x = (x<<1)+(x<<3)] */ + BitVector_shift_left(flt->mantissa, 0); + BitVector_Copy(operand[0], flt->mantissa); + BitVector_Move_Left(flt->mantissa, 2); + carry = 0; + BitVector_add(operand[1], operand[0], flt->mantissa, &carry); + + /* Add in current digit */ + BitVector_Empty(operand[0]); + BitVector_Chunk_Store(operand[0], 4, 0, *str-'0'); + carry = 0; + BitVector_add(flt->mantissa, operand[1], operand[0], &carry); + } + sig_digits++; + str++; + } + } + + if (*str == 'e' || *str == 'E') { + str++; + /* We just saw the "E" character, now read in the exponent value and + * add it into dec_exponent. + */ + dec_exp_add = 0; + sscanf(str, "%d", &dec_exp_add); + dec_exponent += dec_exp_add; + } + + /* Normalize the number, checking for 0 first. */ + if (BitVector_is_empty(flt->mantissa)) { + /* Mantissa is 0, zero exponent too. */ + flt->exponent = 0; + /* Set zero flag so output functions don't see 0 value as underflow. */ + flt->flags |= FLAG_ISZERO; + /* Free calculation variables and return. */ + BitVector_Destroy(operand[1]); + BitVector_Destroy(operand[0]); + + return flt; + } + flt->exponent = 0x7FFF+(MANT_BITS-1); /* Exponent if already norm. */ + floatnum_normalize(flt); + + /* The number is normalized. Now multiply by 10 the number of times + * specified in DecExponent. This uses the power of ten tables to speed + * up this operation (and make it more accurate). + */ + if (dec_exponent > 0) { + POT_index = 0; + /* Until we hit 1.0 or finish exponent or overflow */ + while ((POT_index < 14) && (dec_exponent != 0) && + (flt->exponent != EXP_INF)) { + /* Find the first power of ten in the table which is just less than + * the exponent. + */ + while (dec_exponent < POT_TableP[POT_index].dec_exponent) + POT_index++; + + if (POT_index < 14) { + /* Subtract out what we're multiplying in from exponent */ + dec_exponent -= POT_TableP[POT_index].dec_exponent; + + /* Multiply by current power of 10 */ + floatnum_mul(flt, &POT_TableP[POT_index].f); + } + } + } else if (dec_exponent < 0) { + POT_index = 0; + /* Until we hit 1.0 or finish exponent or underflow */ + while ((POT_index < 14) && (dec_exponent != 0) && + (flt->exponent != EXP_ZERO)) { + /* Find the first power of ten in the table which is just less than + * the exponent. + */ + while (dec_exponent > POT_TableN[POT_index].dec_exponent) + POT_index++; + + if (POT_index < 14) { + /* Subtract out what we're multiplying in from exponent */ + dec_exponent -= POT_TableN[POT_index].dec_exponent; + + /* Multiply by current power of 10 */ + floatnum_mul(flt, &POT_TableN[POT_index].f); + } + } + } + + /* Round the result. (Don't round underflow or overflow). */ + if ((flt->exponent != EXP_INF) && (flt->exponent != EXP_ZERO)) + BitVector_increment(flt->mantissa); + return flt; } @@ -86,75 +493,114 @@ floatnum_get_int(unsigned long *ret_val, const floatnum *flt) return 0; } -/* IEEE-754 (Intel) "single precision" format: - * 32 bits: - * Bit 31 Bit 22 Bit 0 - * | | | - * seeeeeee emmmmmmm mmmmmmmm mmmmmmmm +/* Function used by single, double, and extended conversion routines to actually + * perform the conversion. * - * e = bias 127 exponent - * s = sign bit - * m = mantissa bits, bit 23 is an implied one bit. + * ptr -> the array to return the little-endian floating point value into. + * flt -> the floating point value to convert. + * byte_size -> the size in bytes of the output format. + * mant_bits -> the size in bits of the output mantissa. + * implicit1 -> does the output format have an implicit 1? 1=yes, 0=no. + * exp_bits -> the size in bits of the output exponent. + * + * Returns 0 on success, 1 if overflow, -1 if underflow. */ -int -floatnum_get_single(unsigned char *ptr, const floatnum *flt) +static int +floatnum_get_common(unsigned char *ptr, const floatnum *flt, int byte_size, + int mant_bits, int implicit1, int exp_bits) { - unsigned long exponent = flt->exponent; + int exponent = flt->exponent; unsigned int *output; unsigned char *buf; unsigned int len; + unsigned int overflow = 0, underflow = 0, retval = 0; + int exp_bias = (1<<(exp_bits-1))-1; + int exp_inf = (1<mantissa, 0, 40, 23); + BitVector_Interval_Copy(output, flt->mantissa, 0, + (MANT_BITS-implicit1)-mant_bits, mant_bits); /* round mantissa */ - BitVector_increment(output); + if (BitVector_bit_test(flt->mantissa, (MANT_BITS-implicit1)-(mant_bits+1))) + BitVector_increment(output); - if (BitVector_bit_test(output, 23)) { + if (BitVector_bit_test(output, mant_bits)) { /* overflowed, so divide mantissa by 2 (shift right) */ BitVector_shift_right(output, 0); /* and up the exponent (checking for overflow) */ - if (exponent+1 >= 0x10000) { - BitVector_Destroy(output); - return 1; /* exponent too large */ - } - exponent++; + if (exponent+1 >= EXP_INF) + overflow = 1; + else + exponent++; } - /* adjust the exponent to bias 127 */ - exponent -= 32767-127; - if (exponent >= 256) { - BitVector_Destroy(output); - return 1; /* exponent too large */ + /* adjust the exponent to the output bias, checking for overflow */ + exponent -= EXP_BIAS-exp_bias; + if (exponent >= exp_inf) + overflow = 1; + else if (exponent <= 0) + underflow = 1; + + /* underflow and overflow both set!? */ + if (underflow && overflow) + InternalError(__LINE__, __FILE__, _("Both underflow and overflow set")); + + /* check for underflow or overflow and set up appropriate output */ + if (underflow) { + BitVector_Empty(output); + exponent = 0; + if (!(flt->flags & FLAG_ISZERO)) + retval = -1; + } else if (overflow) { + BitVector_Empty(output); + exponent = exp_inf; + retval = 1; } /* move exponent into place */ - BitVector_Chunk_Store(output, 8, 23, exponent); + BitVector_Chunk_Store(output, exp_bits, mant_bits, exponent); /* merge in sign bit */ - BitVector_Bit_Copy(output, 31, flt->sign); + BitVector_Bit_Copy(output, byte_size*8-1, flt->sign); /* get little-endian bytes */ buf = BitVector_Block_Read(output, &len); if (!buf) Fatal(FATAL_NOMEM); - if (len != 4) + if (len < byte_size) InternalError(__LINE__, __FILE__, - _("Length of 32-bit BitVector not 4")); + _("Byte length of BitVector does not match bit length")); /* copy to output */ - memcpy(ptr, buf, 4*sizeof(unsigned char)); + memcpy(ptr, buf, byte_size*sizeof(unsigned char)); /* free allocated resources */ free(buf); BitVector_Destroy(output); - return 0; + return retval; +} + +/* IEEE-754 (Intel) "single precision" format: + * 32 bits: + * Bit 31 Bit 22 Bit 0 + * | | | + * seeeeeee emmmmmmm mmmmmmmm mmmmmmmm + * + * e = bias 127 exponent + * s = sign bit + * m = mantissa bits, bit 23 is an implied one bit. + */ +int +floatnum_get_single(unsigned char *ptr, const floatnum *flt) +{ + return floatnum_get_common(ptr, flt, 4, 23, 1, 8); } /* IEEE-754 (Intel) "double precision" format: @@ -170,62 +616,7 @@ floatnum_get_single(unsigned char *ptr, const floatnum *flt) int floatnum_get_double(unsigned char *ptr, const floatnum *flt) { - unsigned long exponent = flt->exponent; - unsigned int *output; - unsigned char *buf; - unsigned int len; - - output = BitVector_Create(64, TRUE); - if (!output) - Fatal(FATAL_NOMEM); - - /* copy mantissa */ - BitVector_Interval_Copy(output, flt->mantissa, 0, 11, 52); - - /* round mantissa */ - BitVector_increment(output); - - if (BitVector_bit_test(output, 52)) { - /* overflowed, so divide mantissa by 2 (shift right) */ - BitVector_shift_right(output, 0); - /* and up the exponent (checking for overflow) */ - if (exponent+1 >= 0x10000) { - BitVector_Destroy(output); - return 1; /* exponent too large */ - } - exponent++; - } - - /* adjust the exponent to bias 1023 */ - exponent -= 32767-1023; - if (exponent >= 2048) { - BitVector_Destroy(output); - return 1; /* exponent too large */ - } - - /* move exponent into place */ - BitVector_Chunk_Store(output, 11, 52, exponent); - - /* merge in sign bit */ - BitVector_Bit_Copy(output, 63, flt->sign); - - /* get little-endian bytes */ - buf = BitVector_Block_Read(output, &len); - if (!buf) - Fatal(FATAL_NOMEM); - if (len != 8) - InternalError(__LINE__, __FILE__, - _("Length of 64-bit BitVector not 8")); - - /* copy to output */ - memcpy(ptr, buf, 8*sizeof(unsigned char)); - - /* free allocated resources */ - free(buf); - - BitVector_Destroy(output); - - return 0; + return floatnum_get_common(ptr, flt, 8, 52, 1, 11); } /* IEEE-754 (Intel) "extended precision" format: @@ -234,46 +625,18 @@ floatnum_get_double(unsigned char *ptr, const floatnum *flt) * | | | * seeeeeee eeeeeeee mmmmmmmm m...m m...m m...m m...m m...m * - * e = bias 16384 exponent + * e = bias 16383 exponent * m = 64 bit mantissa with NO implied bit! * s = sign (for mantissa) */ int floatnum_get_extended(unsigned char *ptr, const floatnum *flt) { - unsigned short exponent = flt->exponent; - unsigned char *buf; - unsigned int len; - - /* Adjust the exponent to bias 16384 */ - exponent -= 0x4000; - if (exponent >= 0x8000) - return 1; /* exponent too large */ - - /* get little-endian bytes */ - buf = BitVector_Block_Read(flt->mantissa, &len); - if (!buf) - Fatal(FATAL_NOMEM); - if (len != 8) - InternalError(__LINE__, __FILE__, - _("Length of 64-bit BitVector not 8")); - - /* copy to output */ - memcpy(ptr, buf, 8*sizeof(unsigned char)); - - /* Save exponent and sign in proper location */ - SAVE_SHORT(&ptr[8], exponent); - if (flt->sign) - ptr[9] |= 0x80; - - /* free allocated resources */ - free(buf); - - return 0; + return floatnum_get_common(ptr, flt, 10, 64, 0, 15); } void floatnum_print(const floatnum *flt) { - /* TODO */ + /* TODO */ } diff --git a/src/floatnum.h b/src/floatnum.h index 8ac8bf6f..e40099c4 100644 --- a/src/floatnum.h +++ b/src/floatnum.h @@ -24,20 +24,20 @@ #ifndef YASM_FLOATNUM_H #define YASM_FLOATNUM_H -/* 88-bit internal floating point format: - * xxxxxxxs eeeeeeee eeeeeeee m..m m..m m..m m..m m..m m..m m..m m..m - * Sign exponent mantissa (64 bits) - * | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0| - * 63 55 47 39 31 23 15 7 0 +/* 97-bit internal floating point format: + * xxxxxxxs eeeeeeee eeeeeeee m.....................................m + * Sign exponent mantissa (80 bits) + * 79 0 * * Only L.O. bit of Sign byte is significant. The rest is garbage. - * Exponent is bias 32767 exponent. + * Exponent is bias 32767. * Mantissa does NOT have an implied one bit (it's explicit). */ typedef struct floatnum_s { unsigned int *mantissa; /* Allocated to 64 bits */ unsigned short exponent; unsigned char sign; + unsigned char flags; } floatnum; floatnum *floatnum_new(char *str); diff --git a/src/tests/floatnum_test.c b/src/tests/floatnum_test.c index cc093631..d94797c6 100644 --- a/src/tests/floatnum_test.c +++ b/src/tests/floatnum_test.c @@ -13,7 +13,7 @@ void get_family_setup(void) { flt = malloc(sizeof(floatnum)); - flt->mantissa = BitVector_Create(64, TRUE); + flt->mantissa = BitVector_Create(80, TRUE); } void get_family_teardown(void) @@ -25,12 +25,12 @@ void get_family_teardown(void) void pi_setup(void) { /* test value: 3.141592653589793 */ - /* 80-bit little endian hex: E9 BD 68 21 A2 DA 0F C9 00 40 */ - unsigned char val[] = {0xE9, 0xBD, 0x68, 0x21, 0xA2, 0xDA, 0x0F, 0xC9}; + /* 80-bit little endian mantissa: C6 0D E9 BD 68 21 A2 DA 0F C9 */ + unsigned char val[] = {0xC6, 0x0D, 0xE9, 0xBD, 0x68, 0x21, 0xA2, 0xDA, 0x0F, 0xC9}; unsigned char sign = 0; unsigned short exp = 32767 + 1; - BitVector_Block_Store(flt->mantissa, val, 8); + BitVector_Block_Store(flt->mantissa, val, 10); flt->sign = sign; flt->exponent = exp; } -- 2.40.0