From c0c00c38adbe1a9a2102239ebcaadb672fd9014b Mon Sep 17 00:00:00 2001 From: Steven D'Aprano Date: Tue, 1 Dec 2015 13:48:48 +1100 Subject: [PATCH] Fix for issue #25177 with the mean of very small and very large numbers. --- Lib/statistics.py | 185 +++++++++++------- Lib/test/test_statistics.py | 363 +++++++++++++++++++++++++++++++----- Misc/NEWS | 4 + 3 files changed, 433 insertions(+), 119 deletions(-) diff --git a/Lib/statistics.py b/Lib/statistics.py index a56f91f27f..518f546544 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -104,6 +104,8 @@ import math from fractions import Fraction from decimal import Decimal +from itertools import groupby + # === Exceptions === @@ -115,86 +117,102 @@ class StatisticsError(ValueError): # === Private utilities === def _sum(data, start=0): - """_sum(data [, start]) -> value + """_sum(data [, start]) -> (type, sum, count) + + Return a high-precision sum of the given numeric data as a fraction, + together with the type to be converted to and the count of items. - Return a high-precision sum of the given numeric data. If optional - argument ``start`` is given, it is added to the total. If ``data`` is - empty, ``start`` (defaulting to 0) is returned. + If optional argument ``start`` is given, it is added to the total. + If ``data`` is empty, ``start`` (defaulting to 0) is returned. Examples -------- >>> _sum([3, 2.25, 4.5, -0.5, 1.0], 0.75) - 11.0 + (, Fraction(11, 1), 5) Some sources of round-off error will be avoided: >>> _sum([1e50, 1, -1e50] * 1000) # Built-in sum returns zero. - 1000.0 + (, Fraction(1000, 1), 3000) Fractions and Decimals are also supported: >>> from fractions import Fraction as F >>> _sum([F(2, 3), F(7, 5), F(1, 4), F(5, 6)]) - Fraction(63, 20) + (, Fraction(63, 20), 4) >>> from decimal import Decimal as D >>> data = [D("0.1375"), D("0.2108"), D("0.3061"), D("0.0419")] >>> _sum(data) - Decimal('0.6963') + (, Fraction(6963, 10000), 4) Mixed types are currently treated as an error, except that int is allowed. """ - # We fail as soon as we reach a value that is not an int or the type of - # the first value which is not an int. E.g. _sum([int, int, float, int]) - # is okay, but sum([int, int, float, Fraction]) is not. - allowed_types = set([int, type(start)]) + count = 0 n, d = _exact_ratio(start) - partials = {d: n} # map {denominator: sum of numerators} - # Micro-optimizations. - exact_ratio = _exact_ratio + partials = {d: n} partials_get = partials.get - # Add numerators for each denominator. - for x in data: - _check_type(type(x), allowed_types) - n, d = exact_ratio(x) - partials[d] = partials_get(d, 0) + n - # Find the expected result type. If allowed_types has only one item, it - # will be int; if it has two, use the one which isn't int. - assert len(allowed_types) in (1, 2) - if len(allowed_types) == 1: - assert allowed_types.pop() is int - T = int - else: - T = (allowed_types - set([int])).pop() + T = _coerce(int, type(start)) + for typ, values in groupby(data, type): + T = _coerce(T, typ) # or raise TypeError + for n,d in map(_exact_ratio, values): + count += 1 + partials[d] = partials_get(d, 0) + n if None in partials: - assert issubclass(T, (float, Decimal)) - assert not math.isfinite(partials[None]) - return T(partials[None]) - total = Fraction() - for d, n in sorted(partials.items()): - total += Fraction(n, d) - if issubclass(T, int): - assert total.denominator == 1 - return T(total.numerator) - if issubclass(T, Decimal): - return T(total.numerator)/total.denominator - return T(total) - - -def _check_type(T, allowed): - if T not in allowed: - if len(allowed) == 1: - allowed.add(T) - else: - types = ', '.join([t.__name__ for t in allowed] + [T.__name__]) - raise TypeError("unsupported mixed types: %s" % types) + # The sum will be a NAN or INF. We can ignore all the finite + # partials, and just look at this special one. + total = partials[None] + assert not _isfinite(total) + else: + # Sum all the partial sums using builtin sum. + # FIXME is this faster if we sum them in order of the denominator? + total = sum(Fraction(n, d) for d, n in sorted(partials.items())) + return (T, total, count) + + +def _isfinite(x): + try: + return x.is_finite() # Likely a Decimal. + except AttributeError: + return math.isfinite(x) # Coerces to float first. + + +def _coerce(T, S): + """Coerce types T and S to a common type, or raise TypeError. + + Coercion rules are currently an implementation detail. See the CoerceTest + test class in test_statistics for details. + """ + # See http://bugs.python.org/issue24068. + assert T is not bool, "initial type T is bool" + # If the types are the same, no need to coerce anything. Put this + # first, so that the usual case (no coercion needed) happens as soon + # as possible. + if T is S: return T + # Mixed int & other coerce to the other type. + if S is int or S is bool: return T + if T is int: return S + # If one is a (strict) subclass of the other, coerce to the subclass. + if issubclass(S, T): return S + if issubclass(T, S): return T + # Ints coerce to the other type. + if issubclass(T, int): return S + if issubclass(S, int): return T + # Mixed fraction & float coerces to float (or float subclass). + if issubclass(T, Fraction) and issubclass(S, float): + return S + if issubclass(T, float) and issubclass(S, Fraction): + return T + # Any other combination is disallowed. + msg = "don't know how to coerce %s and %s" + raise TypeError(msg % (T.__name__, S.__name__)) def _exact_ratio(x): - """Convert Real number x exactly to (numerator, denominator) pair. + """Return Real number x to exact (numerator, denominator) pair. >>> _exact_ratio(0.25) (1, 4) @@ -202,29 +220,31 @@ def _exact_ratio(x): x is expected to be an int, Fraction, Decimal or float. """ try: + # Optimise the common case of floats. We expect that the most often + # used numeric type will be builtin floats, so try to make this as + # fast as possible. + if type(x) is float: + return x.as_integer_ratio() try: - # int, Fraction + # x may be an int, Fraction, or Integral ABC. return (x.numerator, x.denominator) except AttributeError: - # float try: + # x may be a float subclass. return x.as_integer_ratio() except AttributeError: - # Decimal try: + # x may be a Decimal. return _decimal_to_ratio(x) except AttributeError: - msg = "can't convert type '{}' to numerator/denominator" - raise TypeError(msg.format(type(x).__name__)) from None + # Just give up? + pass except (OverflowError, ValueError): - # INF or NAN - if __debug__: - # Decimal signalling NANs cannot be converted to float :-( - if isinstance(x, Decimal): - assert not x.is_finite() - else: - assert not math.isfinite(x) + # float NAN or INF. + assert not math.isfinite(x) return (x, None) + msg = "can't convert type '{}' to numerator/denominator" + raise TypeError(msg.format(type(x).__name__)) # FIXME This is faster than Fraction.from_decimal, but still too slow. @@ -239,7 +259,7 @@ def _decimal_to_ratio(d): sign, digits, exp = d.as_tuple() if exp in ('F', 'n', 'N'): # INF, NAN, sNAN assert not d.is_finite() - raise ValueError + return (d, None) num = 0 for digit in digits: num = num*10 + digit @@ -253,6 +273,24 @@ def _decimal_to_ratio(d): return (num, den) +def _convert(value, T): + """Convert value to given numeric type T.""" + if type(value) is T: + # This covers the cases where T is Fraction, or where value is + # a NAN or INF (Decimal or float). + return value + if issubclass(T, int) and value.denominator != 1: + T = float + try: + # FIXME: what do we do if this overflows? + return T(value) + except TypeError: + if issubclass(T, Decimal): + return T(value.numerator)/T(value.denominator) + else: + raise + + def _counts(data): # Generate a table of sorted (value, frequency) pairs. table = collections.Counter(iter(data)).most_common() @@ -290,7 +328,9 @@ def mean(data): n = len(data) if n < 1: raise StatisticsError('mean requires at least one data point') - return _sum(data)/n + T, total, count = _sum(data) + assert count == n + return _convert(total/n, T) # FIXME: investigate ways to calculate medians without sorting? Quickselect? @@ -460,12 +500,14 @@ def _ss(data, c=None): """ if c is None: c = mean(data) - ss = _sum((x-c)**2 for x in data) + T, total, count = _sum((x-c)**2 for x in data) # The following sum should mathematically equal zero, but due to rounding # error may not. - ss -= _sum((x-c) for x in data)**2/len(data) - assert not ss < 0, 'negative sum of square deviations: %f' % ss - return ss + U, total2, count2 = _sum((x-c) for x in data) + assert T == U and count == count2 + total -= total2**2/len(data) + assert not total < 0, 'negative sum of square deviations: %f' % total + return (T, total) def variance(data, xbar=None): @@ -511,8 +553,8 @@ def variance(data, xbar=None): n = len(data) if n < 2: raise StatisticsError('variance requires at least two data points') - ss = _ss(data, xbar) - return ss/(n-1) + T, ss = _ss(data, xbar) + return _convert(ss/(n-1), T) def pvariance(data, mu=None): @@ -560,7 +602,8 @@ def pvariance(data, mu=None): if n < 1: raise StatisticsError('pvariance requires at least one data point') ss = _ss(data, mu) - return ss/n + T, ss = _ss(data, mu) + return _convert(ss/n, T) def stdev(data, xbar=None): diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py index 758a481fe7..0089ae8dc6 100644 --- a/Lib/test/test_statistics.py +++ b/Lib/test/test_statistics.py @@ -21,6 +21,37 @@ import statistics # === Helper functions and class === +def _nan_equal(a, b): + """Return True if a and b are both the same kind of NAN. + + >>> _nan_equal(Decimal('NAN'), Decimal('NAN')) + True + >>> _nan_equal(Decimal('sNAN'), Decimal('sNAN')) + True + >>> _nan_equal(Decimal('NAN'), Decimal('sNAN')) + False + >>> _nan_equal(Decimal(42), Decimal('NAN')) + False + + >>> _nan_equal(float('NAN'), float('NAN')) + True + >>> _nan_equal(float('NAN'), 0.5) + False + + >>> _nan_equal(float('NAN'), Decimal('NAN')) + False + + NAN payloads are not compared. + """ + if type(a) is not type(b): + return False + if isinstance(a, float): + return math.isnan(a) and math.isnan(b) + aexp = a.as_tuple()[2] + bexp = b.as_tuple()[2] + return (aexp == bexp) and (aexp in ('n', 'N')) # Both NAN or both sNAN. + + def _calc_errors(actual, expected): """Return the absolute and relative errors between two numbers. @@ -675,15 +706,60 @@ class ExactRatioTest(unittest.TestCase): self.assertEqual(_exact_ratio(D("12.345")), (12345, 1000)) self.assertEqual(_exact_ratio(D("-1.98")), (-198, 100)) + def test_inf(self): + INF = float("INF") + class MyFloat(float): + pass + class MyDecimal(Decimal): + pass + for inf in (INF, -INF): + for type_ in (float, MyFloat, Decimal, MyDecimal): + x = type_(inf) + ratio = statistics._exact_ratio(x) + self.assertEqual(ratio, (x, None)) + self.assertEqual(type(ratio[0]), type_) + self.assertTrue(math.isinf(ratio[0])) + + def test_float_nan(self): + NAN = float("NAN") + class MyFloat(float): + pass + for nan in (NAN, MyFloat(NAN)): + ratio = statistics._exact_ratio(nan) + self.assertTrue(math.isnan(ratio[0])) + self.assertIs(ratio[1], None) + self.assertEqual(type(ratio[0]), type(nan)) + + def test_decimal_nan(self): + NAN = Decimal("NAN") + sNAN = Decimal("sNAN") + class MyDecimal(Decimal): + pass + for nan in (NAN, MyDecimal(NAN), sNAN, MyDecimal(sNAN)): + ratio = statistics._exact_ratio(nan) + self.assertTrue(_nan_equal(ratio[0], nan)) + self.assertIs(ratio[1], None) + self.assertEqual(type(ratio[0]), type(nan)) + class DecimalToRatioTest(unittest.TestCase): # Test _decimal_to_ratio private function. - def testSpecialsRaise(self): - # Test that NANs and INFs raise ValueError. - # Non-special values are covered by _exact_ratio above. - for d in (Decimal('NAN'), Decimal('sNAN'), Decimal('INF')): - self.assertRaises(ValueError, statistics._decimal_to_ratio, d) + def test_infinity(self): + # Test that INFs are handled correctly. + inf = Decimal('INF') + self.assertEqual(statistics._decimal_to_ratio(inf), (inf, None)) + self.assertEqual(statistics._decimal_to_ratio(-inf), (-inf, None)) + + def test_nan(self): + # Test that NANs are handled correctly. + for nan in (Decimal('NAN'), Decimal('sNAN')): + num, den = statistics._decimal_to_ratio(nan) + # Because NANs always compare non-equal, we cannot use assertEqual. + # Nor can we use an identity test, as we don't guarantee anything + # about the object identity. + self.assertTrue(_nan_equal(num, nan)) + self.assertIs(den, None) def test_sign(self): # Test sign is calculated correctly. @@ -718,25 +794,181 @@ class DecimalToRatioTest(unittest.TestCase): self.assertEqual(t, (147000, 1)) -class CheckTypeTest(unittest.TestCase): - # Test _check_type private function. +class IsFiniteTest(unittest.TestCase): + # Test _isfinite private function. + + def test_finite(self): + # Test that finite numbers are recognised as finite. + for x in (5, Fraction(1, 3), 2.5, Decimal("5.5")): + self.assertTrue(statistics._isfinite(x)) - def test_allowed(self): - # Test that a type which should be allowed is allowed. - allowed = set([int, float]) - statistics._check_type(int, allowed) - statistics._check_type(float, allowed) + def test_infinity(self): + # Test that INFs are not recognised as finite. + for x in (float("inf"), Decimal("inf")): + self.assertFalse(statistics._isfinite(x)) + + def test_nan(self): + # Test that NANs are not recognised as finite. + for x in (float("nan"), Decimal("NAN"), Decimal("sNAN")): + self.assertFalse(statistics._isfinite(x)) + + +class CoerceTest(unittest.TestCase): + # Test that private function _coerce correctly deals with types. + + # The coercion rules are currently an implementation detail, although at + # some point that should change. The tests and comments here define the + # correct implementation. + + # Pre-conditions of _coerce: + # + # - The first time _sum calls _coerce, the + # - coerce(T, S) will never be called with bool as the first argument; + # this is a pre-condition, guarded with an assertion. + + # + # - coerce(T, T) will always return T; we assume T is a valid numeric + # type. Violate this assumption at your own risk. + # + # - Apart from as above, bool is treated as if it were actually int. + # + # - coerce(int, X) and coerce(X, int) return X. + # - + def test_bool(self): + # bool is somewhat special, due to the pre-condition that it is + # never given as the first argument to _coerce, and that it cannot + # be subclassed. So we test it specially. + for T in (int, float, Fraction, Decimal): + self.assertIs(statistics._coerce(T, bool), T) + class MyClass(T): pass + self.assertIs(statistics._coerce(MyClass, bool), MyClass) + + def assertCoerceTo(self, A, B): + """Assert that type A coerces to B.""" + self.assertIs(statistics._coerce(A, B), B) + self.assertIs(statistics._coerce(B, A), B) + + def check_coerce_to(self, A, B): + """Checks that type A coerces to B, including subclasses.""" + # Assert that type A is coerced to B. + self.assertCoerceTo(A, B) + # Subclasses of A are also coerced to B. + class SubclassOfA(A): pass + self.assertCoerceTo(SubclassOfA, B) + # A, and subclasses of A, are coerced to subclasses of B. + class SubclassOfB(B): pass + self.assertCoerceTo(A, SubclassOfB) + self.assertCoerceTo(SubclassOfA, SubclassOfB) + + def assertCoerceRaises(self, A, B): + """Assert that coercing A to B, or vice versa, raises TypeError.""" + self.assertRaises(TypeError, statistics._coerce, (A, B)) + self.assertRaises(TypeError, statistics._coerce, (B, A)) + + def check_type_coercions(self, T): + """Check that type T coerces correctly with subclasses of itself.""" + assert T is not bool + # Coercing a type with itself returns the same type. + self.assertIs(statistics._coerce(T, T), T) + # Coercing a type with a subclass of itself returns the subclass. + class U(T): pass + class V(T): pass + class W(U): pass + for typ in (U, V, W): + self.assertCoerceTo(T, typ) + self.assertCoerceTo(U, W) + # Coercing two subclasses that aren't parent/child is an error. + self.assertCoerceRaises(U, V) + self.assertCoerceRaises(V, W) - def test_not_allowed(self): - # Test that a type which should not be allowed raises. - allowed = set([int, float]) - self.assertRaises(TypeError, statistics._check_type, Decimal, allowed) + def test_int(self): + # Check that int coerces correctly. + self.check_type_coercions(int) + for typ in (float, Fraction, Decimal): + self.check_coerce_to(int, typ) - def test_add_to_allowed(self): - # Test that a second type will be added to the allowed set. - allowed = set([int]) - statistics._check_type(float, allowed) - self.assertEqual(allowed, set([int, float])) + def test_fraction(self): + # Check that Fraction coerces correctly. + self.check_type_coercions(Fraction) + self.check_coerce_to(Fraction, float) + + def test_decimal(self): + # Check that Decimal coerces correctly. + self.check_type_coercions(Decimal) + + def test_float(self): + # Check that float coerces correctly. + self.check_type_coercions(float) + + def test_non_numeric_types(self): + for bad_type in (str, list, type(None), tuple, dict): + for good_type in (int, float, Fraction, Decimal): + self.assertCoerceRaises(good_type, bad_type) + + def test_incompatible_types(self): + # Test that incompatible types raise. + for T in (float, Fraction): + class MySubclass(T): pass + self.assertCoerceRaises(T, Decimal) + self.assertCoerceRaises(MySubclass, Decimal) + + +class ConvertTest(unittest.TestCase): + # Test private _convert function. + + def check_exact_equal(self, x, y): + """Check that x equals y, and has the same type as well.""" + self.assertEqual(x, y) + self.assertIs(type(x), type(y)) + + def test_int(self): + # Test conversions to int. + x = statistics._convert(Fraction(71), int) + self.check_exact_equal(x, 71) + class MyInt(int): pass + x = statistics._convert(Fraction(17), MyInt) + self.check_exact_equal(x, MyInt(17)) + + def test_fraction(self): + # Test conversions to Fraction. + x = statistics._convert(Fraction(95, 99), Fraction) + self.check_exact_equal(x, Fraction(95, 99)) + class MyFraction(Fraction): + def __truediv__(self, other): + return self.__class__(super().__truediv__(other)) + x = statistics._convert(Fraction(71, 13), MyFraction) + self.check_exact_equal(x, MyFraction(71, 13)) + + def test_float(self): + # Test conversions to float. + x = statistics._convert(Fraction(-1, 2), float) + self.check_exact_equal(x, -0.5) + class MyFloat(float): + def __truediv__(self, other): + return self.__class__(super().__truediv__(other)) + x = statistics._convert(Fraction(9, 8), MyFloat) + self.check_exact_equal(x, MyFloat(1.125)) + + def test_decimal(self): + # Test conversions to Decimal. + x = statistics._convert(Fraction(1, 40), Decimal) + self.check_exact_equal(x, Decimal("0.025")) + class MyDecimal(Decimal): + def __truediv__(self, other): + return self.__class__(super().__truediv__(other)) + x = statistics._convert(Fraction(-15, 16), MyDecimal) + self.check_exact_equal(x, MyDecimal("-0.9375")) + + def test_inf(self): + for INF in (float('inf'), Decimal('inf')): + for inf in (INF, -INF): + x = statistics._convert(inf, type(inf)) + self.check_exact_equal(x, inf) + + def test_nan(self): + for nan in (float('nan'), Decimal('NAN'), Decimal('sNAN')): + x = statistics._convert(nan, type(nan)) + self.assertTrue(_nan_equal(x, nan)) # === Tests for public functions === @@ -874,52 +1106,71 @@ class UnivariateTypeMixin: self.assertIs(type(result), kind) -class TestSum(NumericTestCase, UnivariateCommonMixin, UnivariateTypeMixin): +class TestSumCommon(UnivariateCommonMixin, UnivariateTypeMixin): + # Common test cases for statistics._sum() function. + + # This test suite looks only at the numeric value returned by _sum, + # after conversion to the appropriate type. + def setUp(self): + def simplified_sum(*args): + T, value, n = statistics._sum(*args) + return statistics._coerce(value, T) + self.func = simplified_sum + + +class TestSum(NumericTestCase): # Test cases for statistics._sum() function. + # These tests look at the entire three value tuple returned by _sum. + def setUp(self): self.func = statistics._sum def test_empty_data(self): # Override test for empty data. for data in ([], (), iter([])): - self.assertEqual(self.func(data), 0) - self.assertEqual(self.func(data, 23), 23) - self.assertEqual(self.func(data, 2.3), 2.3) + self.assertEqual(self.func(data), (int, Fraction(0), 0)) + self.assertEqual(self.func(data, 23), (int, Fraction(23), 0)) + self.assertEqual(self.func(data, 2.3), (float, Fraction(2.3), 0)) def test_ints(self): - self.assertEqual(self.func([1, 5, 3, -4, -8, 20, 42, 1]), 60) - self.assertEqual(self.func([4, 2, 3, -8, 7], 1000), 1008) + self.assertEqual(self.func([1, 5, 3, -4, -8, 20, 42, 1]), + (int, Fraction(60), 8)) + self.assertEqual(self.func([4, 2, 3, -8, 7], 1000), + (int, Fraction(1008), 5)) def test_floats(self): - self.assertEqual(self.func([0.25]*20), 5.0) - self.assertEqual(self.func([0.125, 0.25, 0.5, 0.75], 1.5), 3.125) + self.assertEqual(self.func([0.25]*20), + (float, Fraction(5.0), 20)) + self.assertEqual(self.func([0.125, 0.25, 0.5, 0.75], 1.5), + (float, Fraction(3.125), 4)) def test_fractions(self): - F = Fraction - self.assertEqual(self.func([Fraction(1, 1000)]*500), Fraction(1, 2)) + self.assertEqual(self.func([Fraction(1, 1000)]*500), + (Fraction, Fraction(1, 2), 500)) def test_decimals(self): D = Decimal data = [D("0.001"), D("5.246"), D("1.702"), D("-0.025"), D("3.974"), D("2.328"), D("4.617"), D("2.843"), ] - self.assertEqual(self.func(data), Decimal("20.686")) + self.assertEqual(self.func(data), + (Decimal, Decimal("20.686"), 8)) def test_compare_with_math_fsum(self): # Compare with the math.fsum function. # Ideally we ought to get the exact same result, but sometimes # we differ by a very slight amount :-( data = [random.uniform(-100, 1000) for _ in range(1000)] - self.assertApproxEqual(self.func(data), math.fsum(data), rel=2e-16) + self.assertApproxEqual(float(self.func(data)[1]), math.fsum(data), rel=2e-16) def test_start_argument(self): # Test that the optional start argument works correctly. data = [random.uniform(1, 1000) for _ in range(100)] - t = self.func(data) - self.assertEqual(t+42, self.func(data, 42)) - self.assertEqual(t-23, self.func(data, -23)) - self.assertEqual(t+1e20, self.func(data, 1e20)) + t = self.func(data)[1] + self.assertEqual(t+42, self.func(data, 42)[1]) + self.assertEqual(t-23, self.func(data, -23)[1]) + self.assertEqual(t+Fraction(1e20), self.func(data, 1e20)[1]) def test_strings_fail(self): # Sum of strings should fail. @@ -934,7 +1185,7 @@ class TestSum(NumericTestCase, UnivariateCommonMixin, UnivariateTypeMixin): def test_mixed_sum(self): # Mixed input types are not (currently) allowed. # Check that mixed data types fail. - self.assertRaises(TypeError, self.func, [1, 2.0, Fraction(1, 2)]) + self.assertRaises(TypeError, self.func, [1, 2.0, Decimal(1)]) # And so does mixed start argument. self.assertRaises(TypeError, self.func, [1, 2.0], Decimal(1)) @@ -942,11 +1193,14 @@ class TestSum(NumericTestCase, UnivariateCommonMixin, UnivariateTypeMixin): class SumTortureTest(NumericTestCase): def test_torture(self): # Tim Peters' torture test for sum, and variants of same. - self.assertEqual(statistics._sum([1, 1e100, 1, -1e100]*10000), 20000.0) - self.assertEqual(statistics._sum([1e100, 1, 1, -1e100]*10000), 20000.0) - self.assertApproxEqual( - statistics._sum([1e-100, 1, 1e-100, -1]*10000), 2.0e-96, rel=5e-16 - ) + self.assertEqual(statistics._sum([1, 1e100, 1, -1e100]*10000), + (float, Fraction(20000.0), 40000)) + self.assertEqual(statistics._sum([1e100, 1, 1, -1e100]*10000), + (float, Fraction(20000.0), 40000)) + T, num, count = statistics._sum([1e-100, 1, 1e-100, -1]*10000) + self.assertIs(T, float) + self.assertEqual(count, 40000) + self.assertApproxEqual(float(num), 2.0e-96, rel=5e-16) class SumSpecialValues(NumericTestCase): @@ -955,7 +1209,7 @@ class SumSpecialValues(NumericTestCase): def test_nan(self): for type_ in (float, Decimal): nan = type_('nan') - result = statistics._sum([1, nan, 2]) + result = statistics._sum([1, nan, 2])[1] self.assertIs(type(result), type_) self.assertTrue(math.isnan(result)) @@ -968,10 +1222,10 @@ class SumSpecialValues(NumericTestCase): def do_test_inf(self, inf): # Adding a single infinity gives infinity. - result = statistics._sum([1, 2, inf, 3]) + result = statistics._sum([1, 2, inf, 3])[1] self.check_infinity(result, inf) # Adding two infinities of the same sign also gives infinity. - result = statistics._sum([1, 2, inf, 3, inf, 4]) + result = statistics._sum([1, 2, inf, 3, inf, 4])[1] self.check_infinity(result, inf) def test_float_inf(self): @@ -987,7 +1241,7 @@ class SumSpecialValues(NumericTestCase): def test_float_mismatched_infs(self): # Test that adding two infinities of opposite sign gives a NAN. inf = float('inf') - result = statistics._sum([1, 2, inf, 3, -inf, 4]) + result = statistics._sum([1, 2, inf, 3, -inf, 4])[1] self.assertTrue(math.isnan(result)) def test_decimal_extendedcontext_mismatched_infs_to_nan(self): @@ -995,7 +1249,7 @@ class SumSpecialValues(NumericTestCase): inf = Decimal('inf') data = [1, 2, inf, 3, -inf, 4] with decimal.localcontext(decimal.ExtendedContext): - self.assertTrue(math.isnan(statistics._sum(data))) + self.assertTrue(math.isnan(statistics._sum(data)[1])) def test_decimal_basiccontext_mismatched_infs_to_nan(self): # Test adding Decimal INFs with opposite sign raises InvalidOperation. @@ -1111,6 +1365,19 @@ class TestMean(NumericTestCase, AverageMixin, UnivariateTypeMixin): d = Decimal('1e4') self.assertEqual(statistics.mean([d]), d) + def test_regression_25177(self): + # Regression test for issue 25177. + # Ensure very big and very small floats don't overflow. + # See http://bugs.python.org/issue25177. + self.assertEqual(statistics.mean( + [8.988465674311579e+307, 8.98846567431158e+307]), + 8.98846567431158e+307) + big = 8.98846567431158e+307 + tiny = 5e-324 + for n in (2, 3, 5, 200): + self.assertEqual(statistics.mean([big]*n), big) + self.assertEqual(statistics.mean([tiny]*n), tiny) + class TestMedian(NumericTestCase, AverageMixin): # Common tests for median and all median.* functions. diff --git a/Misc/NEWS b/Misc/NEWS index c7a13948a3..08efa17ea5 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -113,6 +113,10 @@ Core and Builtins Library ------- +- Issue #25177: Fixed problem with the mean of very small and very large + numbers. As a side effect, statistics.mean and statistics.variance should + be significantly faster. + - Issue #25718: Fixed copying object with state with boolean value is false. - Issue #10131: Fixed deep copying of minidom documents. Based on patch -- 2.40.0