From c2f8c81af02519952a857baebb5c0c9048033009 Mon Sep 17 00:00:00 2001 From: Mark Dickinson Date: Tue, 26 Nov 2013 16:38:25 +0000 Subject: [PATCH] Issue #19638: Raise ValueError instead of crashing when converting billion character strings to float. --- Lib/test/test_strtod.py | 31 ++++++++++++++++++++ Misc/NEWS | 3 ++ Python/dtoa.c | 65 ++++++++++++++++++++++++++++++----------- 3 files changed, 82 insertions(+), 17 deletions(-) diff --git a/Lib/test/test_strtod.py b/Lib/test/test_strtod.py index 7bc595daf3..faf5b572a8 100644 --- a/Lib/test/test_strtod.py +++ b/Lib/test/test_strtod.py @@ -249,6 +249,37 @@ class StrtodTests(unittest.TestCase): else: assert False, "expected ValueError" + @test_support.bigmemtest(minsize=5 * test_support._1G, memuse=1) + def test_oversized_digit_strings(self, maxsize): + # Input string whose length doesn't fit in an INT. + s = "1." + "1" * int(2.2e9) + with self.assertRaises(ValueError): + float(s) + del s + + s = "0." + "0" * int(2.2e9) + "1" + with self.assertRaises(ValueError): + float(s) + del s + + def test_large_exponents(self): + # Verify that the clipping of the exponent in strtod doesn't affect the + # output values. + def positive_exp(n): + """ Long string with value 1.0 and exponent n""" + return '0.{}1e+{}'.format('0'*(n-1), n) + + def negative_exp(n): + """ Long string with value 1.0 and exponent -n""" + return '1{}e-{}'.format('0'*n, n) + + self.assertEqual(float(positive_exp(10000)), 1.0) + self.assertEqual(float(positive_exp(20000)), 1.0) + self.assertEqual(float(positive_exp(30000)), 1.0) + self.assertEqual(float(negative_exp(10000)), 1.0) + self.assertEqual(float(negative_exp(20000)), 1.0) + self.assertEqual(float(negative_exp(30000)), 1.0) + def test_particular(self): # inputs that produced crashes or incorrectly rounded results with # previous versions of dtoa.c, for various reasons diff --git a/Misc/NEWS b/Misc/NEWS index f845ae36b9..856daec7ec 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -9,6 +9,9 @@ What's New in Python 2.7.7? Core and Builtins ----------------- +- Issue #19638: Fix possible crash / undefined behaviour from huge (more than 2 + billion characters) input strings in _Py_dg_strtod. + Library ------- diff --git a/Python/dtoa.c b/Python/dtoa.c index 44dc01f1d5..73e23af010 100644 --- a/Python/dtoa.c +++ b/Python/dtoa.c @@ -204,7 +204,24 @@ typedef union { double d; ULong L[2]; } U; MAX_ABS_EXP in absolute value get truncated to +-MAX_ABS_EXP. MAX_ABS_EXP should fit into an int. */ #ifndef MAX_ABS_EXP -#define MAX_ABS_EXP 19999U +#define MAX_ABS_EXP 1100000000U +#endif +/* Bound on length of pieces of input strings in _Py_dg_strtod; specifically, + this is used to bound the total number of digits ignoring leading zeros and + the number of digits that follow the decimal point. Ideally, MAX_DIGITS + should satisfy MAX_DIGITS + 400 < MAX_ABS_EXP; that ensures that the + exponent clipping in _Py_dg_strtod can't affect the value of the output. */ +#ifndef MAX_DIGITS +#define MAX_DIGITS 1000000000U +#endif + +/* Guard against trying to use the above values on unusual platforms with ints + * of width less than 32 bits. */ +#if MAX_ABS_EXP > INT_MAX +#error "MAX_ABS_EXP should fit in an int" +#endif +#if MAX_DIGITS > INT_MAX +#error "MAX_DIGITS should fit in an int" #endif /* The following definition of Storeinc is appropriate for MIPS processors. @@ -1498,6 +1515,7 @@ _Py_dg_strtod(const char *s00, char **se) Long L; BCinfo bc; Bigint *bb, *bb1, *bd, *bd0, *bs, *delta; + size_t ndigits, fraclen; dval(&rv) = 0.; @@ -1520,39 +1538,52 @@ _Py_dg_strtod(const char *s00, char **se) c = *++s; lz = s != s1; - /* Point s0 at the first nonzero digit (if any). nd0 will be the position - of the point relative to s0. nd will be the total number of digits - ignoring leading zeros. */ + /* Point s0 at the first nonzero digit (if any). fraclen will be the + number of digits between the decimal point and the end of the + digit string. ndigits will be the total number of digits ignoring + leading zeros. */ s0 = s1 = s; while ('0' <= c && c <= '9') c = *++s; - nd0 = nd = s - s1; + ndigits = s - s1; + fraclen = 0; /* Parse decimal point and following digits. */ if (c == '.') { c = *++s; - if (!nd) { + if (!ndigits) { s1 = s; while (c == '0') c = *++s; lz = lz || s != s1; - nd0 -= s - s1; + fraclen += (s - s1); s0 = s; } s1 = s; while ('0' <= c && c <= '9') c = *++s; - nd += s - s1; + ndigits += s - s1; + fraclen += s - s1; + } + + /* Now lz is true if and only if there were leading zero digits, and + ndigits gives the total number of digits ignoring leading zeros. A + valid input must have at least one digit. */ + if (!ndigits && !lz) { + if (se) + *se = (char *)s00; + goto parse_error; } - /* Now lz is true if and only if there were leading zero digits, and nd - gives the total number of digits ignoring leading zeros. A valid input - must have at least one digit. */ - if (!nd && !lz) { + /* Range check ndigits and fraclen to make sure that they, and values + computed with them, can safely fit in an int. */ + if (ndigits > MAX_DIGITS || fraclen > MAX_DIGITS) { if (se) *se = (char *)s00; goto parse_error; } + nd = (int)ndigits; + nd0 = (int)ndigits - (int)fraclen; /* Parse exponent. */ e = 0; @@ -1886,20 +1917,20 @@ _Py_dg_strtod(const char *s00, char **se) bd2++; /* At this stage bd5 - bb5 == e == bd2 - bb2 + bbe, bb2 - bs2 == 1, - and bs == 1, so: + and bs == 1, so: tdv == bd * 10**e = bd * 2**(bbe - bb2 + bd2) * 5**(bd5 - bb5) srv == bb * 2**bbe = bb * 2**(bbe - bb2 + bb2) - 0.5 ulp(srv) == 2**(bbe-1) = bs * 2**(bbe - bb2 + bs2) + 0.5 ulp(srv) == 2**(bbe-1) = bs * 2**(bbe - bb2 + bs2) - It follows that: + It follows that: M * tdv = bd * 2**bd2 * 5**bd5 M * srv = bb * 2**bb2 * 5**bb5 M * 0.5 ulp(srv) = bs * 2**bs2 * 5**bb5 - for some constant M. (Actually, M == 2**(bb2 - bbe) * 5**bb5, but - this fact is not needed below.) + for some constant M. (Actually, M == 2**(bb2 - bbe) * 5**bb5, but + this fact is not needed below.) */ /* Remove factor of 2**i, where i = min(bb2, bd2, bs2). */ -- 2.50.1