bpo-35904: Add statistics.fmean() (GH-11892)

author Raymond Hettinger <rhettinger@users.noreply.github.com>

Thu, 21 Feb 2019 23:06:29 +0000 (15:06 -0800)

committer GitHub <noreply@github.com>

Thu, 21 Feb 2019 23:06:29 +0000 (15:06 -0800)
author Raymond Hettinger <rhettinger@users.noreply.github.com>
Thu, 21 Feb 2019 23:06:29 +0000 (15:06 -0800)
committer GitHub <noreply@github.com>
Thu, 21 Feb 2019 23:06:29 +0000 (15:06 -0800)
diff --git a/Doc/library/random.rst b/Doc/library/random.rst

index 7d051e185429d52cbf4d9d67a951df3560ce5f3f..79a7bddad4979284340b205da7ebea5e29f94b2c 100644 (file)
--- a/Doc/library/random.rst
+++ b/Doc/library/random.rst
@@ -404,7 +404,7 @@ with replacement to estimate a confidence interval for the mean of a sample of
  size five::
  
     # http://statistics.about.com/od/Applications/a/Example-Of-Bootstrapping.htm
-   from statistics import mean
+   from statistics import fmean as mean
     from random import choices
  
     data = 1, 2, 4, 4, 10
@@ -419,7 +419,7 @@ to determine the statistical significance or `p-value
  between the effects of a drug versus a placebo::
  
      # Example from "Statistics is Easy" by Dennis Shasha and Manda Wilson
-    from statistics import mean
+    from statistics import fmean as mean
      from random import shuffle
  
      drug = [54, 73, 53, 70, 73, 68, 52, 65, 65]
diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst

index 26bb592b23812b8d36447d72820122ff1f18556d..20a2c1cb13e115e7e2f23fdbf7b7f45ebc06091d 100644 (file)
--- a/Doc/library/statistics.rst
+++ b/Doc/library/statistics.rst
@@ -39,6 +39,7 @@ or sample.
  
  =======================  =============================================
  :func:`mean`             Arithmetic mean ("average") of data.
+:func:`fmean`            Fast, floating point arithmetic mean.
  :func:`harmonic_mean`    Harmonic mean of data.
  :func:`median`           Median (middle value) of data.
  :func:`median_low`       Low median of data.
@@ -111,6 +112,23 @@ However, for reading convenience, most of the examples show sorted sequences.
        ``mean(data)`` is equivalent to calculating the true population mean μ.
  
  
+.. function:: fmean(data)
+
+   Convert *data* to floats and compute the arithmetic mean.
+
+   This runs faster than the :func:`mean` function and it always returns a
+   :class:`float`.  The result is highly accurate but not as perfect as
+   :func:`mean`.  If the input dataset is empty, raises a
+   :exc:`StatisticsError`.
+
+   .. doctest::
+
+      >>> fmean([3.5, 4.0, 5.25])
+      4.25
+
+   .. versionadded:: 3.8
+
+
  .. function:: harmonic_mean(data)
  
     Return the harmonic mean of *data*, a sequence or iterator of
diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst

index 2f759f3454ea3eecfd3016e2ee895ed933c9954b..bf7300db094589f705b61a86fbbf34966098de2a 100644 (file)
--- a/Doc/whatsnew/3.8.rst
+++ b/Doc/whatsnew/3.8.rst
@@ -254,6 +254,15 @@ Added :attr:`SSLContext.post_handshake_auth` to enable and
  post-handshake authentication.
  (Contributed by Christian Heimes in :issue:`34670`.)
  
+
+statistics
+----------
+
+Added :func:`statistics.fmean` as a faster, floating point variant of
+:func:`statistics.mean()`.  (Contributed by Raymond Hettinger and
+Steven D'Aprano in :issue:`35904`.)
+
+
  tokenize
  --------
  
diff --git a/Lib/statistics.py b/Lib/statistics.py

index 47c2bb41cbfc970f63117f2a75f4d83cd46f3749..8ecb906d8699513e433a53ad5f12b586d71b9aca 100644 (file)
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -79,7 +79,7 @@ A single exception is defined: StatisticsError is a subclass of ValueError.
  __all__ = [ 'StatisticsError',
              'pstdev', 'pvariance', 'stdev', 'variance',
              'median',  'median_low', 'median_high', 'median_grouped',
-            'mean', 'mode', 'harmonic_mean',
+            'mean', 'mode', 'harmonic_mean', 'fmean',
            ]
  
  import collections
@@ -312,6 +312,33 @@ def mean(data):
      assert count == n
      return _convert(total/n, T)
  
+def fmean(data):
+    """ Convert data to floats and compute the arithmetic mean.
+
+    This runs faster than the mean() function and it always returns a float.
+    The result is highly accurate but not as perfect as mean().
+    If the input dataset is empty, it raises a StatisticsError.
+
+    >>> fmean([3.5, 4.0, 5.25])
+    4.25
+
+    """
+    try:
+        n = len(data)
+    except TypeError:
+        # Handle iterators that do not define __len__().
+        n = 0
+        def count(x):
+            nonlocal n
+            n += 1
+            return x
+        total = math.fsum(map(count, data))
+    else:
+        total = math.fsum(data)
+    try:
+        return total / n
+    except ZeroDivisionError:
+        raise StatisticsError('fmean requires at least one data point') from None
  
  def harmonic_mean(data):
      """Return the harmonic mean of data.
diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py

index b577433e3f119057b5404eaaf00322c3f5bd7b1a..e35144677ad53eba0388aca811bd7c4e53b1c553 100644 (file)
--- a/Lib/test/test_statistics.py
+++ b/Lib/test/test_statistics.py
@@ -1810,6 +1810,51 @@ class TestMode(NumericTestCase, AverageMixin, UnivariateTypeMixin):
          # counts, this should raise.
          self.assertRaises(statistics.StatisticsError, self.func, data)
  
+class TestFMean(unittest.TestCase):
+
+    def test_basics(self):
+        fmean = statistics.fmean
+        D = Decimal
+        F = Fraction
+        for data, expected_mean, kind in [
+            ([3.5, 4.0, 5.25], 4.25, 'floats'),
+            ([D('3.5'), D('4.0'), D('5.25')], 4.25, 'decimals'),
+            ([F(7, 2), F(4, 1), F(21, 4)], 4.25, 'fractions'),
+            ([True, False, True, True, False], 0.60, 'booleans'),
+            ([3.5, 4, F(21, 4)], 4.25, 'mixed types'),
+            ((3.5, 4.0, 5.25), 4.25, 'tuple'),
+            (iter([3.5, 4.0, 5.25]), 4.25, 'iterator'),
+                ]:
+            actual_mean = fmean(data)
+            self.assertIs(type(actual_mean), float, kind)
+            self.assertEqual(actual_mean, expected_mean, kind)
+
+    def test_error_cases(self):
+        fmean = statistics.fmean
+        StatisticsError = statistics.StatisticsError
+        with self.assertRaises(StatisticsError):
+            fmean([])                               # empty input
+        with self.assertRaises(StatisticsError):
+            fmean(iter([]))                         # empty iterator
+        with self.assertRaises(TypeError):
+            fmean(None)                             # non-iterable input
+        with self.assertRaises(TypeError):
+            fmean([10, None, 20])                   # non-numeric input
+        with self.assertRaises(TypeError):
+            fmean()                                 # missing data argument
+        with self.assertRaises(TypeError):
+            fmean([10, 20, 60], 70)                 # too many arguments
+
+    def test_special_values(self):
+        # Rules for special values are inherited from math.fsum()
+        fmean = statistics.fmean
+        NaN = float('Nan')
+        Inf = float('Inf')
+        self.assertTrue(math.isnan(fmean([10, NaN])), 'nan')
+        self.assertTrue(math.isnan(fmean([NaN, Inf])), 'nan and infinity')
+        self.assertTrue(math.isinf(fmean([10, Inf])), 'infinity')
+        with self.assertRaises(ValueError):
+            fmean([Inf, -Inf])
  
  
  # === Tests for variances and standard deviations ===
diff --git a/Misc/NEWS.d/next/Library/2019-02-16-00-55-52.bpo-35904.V88MCD.rst b/Misc/NEWS.d/next/Library/2019-02-16-00-55-52.bpo-35904.V88MCD.rst

new file mode 100644 (file)

index 0000000..c40c861
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-02-16-00-55-52.bpo-35904.V88MCD.rst
@@ -0,0 +1,2 @@
+Added statistics.fmean() as a faster, floating point variant of the existing
+mean() function.
author	Raymond Hettinger <rhettinger@users.noreply.github.com>
	Thu, 21 Feb 2019 23:06:29 +0000 (15:06 -0800)
committer	GitHub <noreply@github.com>
	Thu, 21 Feb 2019 23:06:29 +0000 (15:06 -0800)
Doc/library/random.rst		patch \| blob \| history
Doc/library/statistics.rst		patch \| blob \| history
Doc/whatsnew/3.8.rst		patch \| blob \| history
Lib/statistics.py		patch \| blob \| history
Lib/test/test_statistics.py		patch \| blob \| history
Misc/NEWS.d/next/Library/2019-02-16-00-55-52.bpo-35904.V88MCD.rst	[new file with mode: 0644]	patch \| blob