]> granicus.if.org Git - python/commitdiff
bpo-27181: Add statistics.geometric_mean() (GH-12638)
authorRaymond Hettinger <rhettinger@users.noreply.github.com>
Sun, 7 Apr 2019 16:20:03 +0000 (09:20 -0700)
committerGitHub <noreply@github.com>
Sun, 7 Apr 2019 16:20:03 +0000 (09:20 -0700)
Doc/library/statistics.rst
Doc/whatsnew/3.8.rst
Lib/statistics.py
Lib/test/test_statistics.py
Misc/NEWS.d/next/Library/2019-03-31-01-18-52.bpo-27181.LVUWcc.rst [new file with mode: 0644]

index 1d52d98b29971b9a7e58d607205d370cc3d651c7..8bb2bdf7b697deabd8e6850f3e11f983fc4e8e40 100644 (file)
@@ -40,6 +40,7 @@ or sample.
 =======================  ===============================================================
 :func:`mean`             Arithmetic mean ("average") of data.
 :func:`fmean`            Fast, floating point arithmetic mean.
+:func:`geometric_mean`   Geometric mean of data.
 :func:`harmonic_mean`    Harmonic mean of data.
 :func:`median`           Median (middle value) of data.
 :func:`median_low`       Low median of data.
@@ -130,6 +131,24 @@ However, for reading convenience, most of the examples show sorted sequences.
    .. versionadded:: 3.8
 
 
+.. function:: geometric_mean(data)
+
+   Convert *data* to floats and compute the geometric mean.
+
+   Raises a :exc:`StatisticsError` if the input dataset is empty,
+   if it contains a zero, or if it contains a negative value.
+
+   No special efforts are made to achieve exact results.
+   (However, this may change in the future.)
+
+   .. doctest::
+
+      >>> round(geometric_mean([54, 24, 36]), 9)
+      36.0
+
+   .. versionadded:: 3.8
+
+
 .. function:: harmonic_mean(data)
 
    Return the harmonic mean of *data*, a sequence or iterator of
index ac20ee3aa57c45cf0ae82525b2ef740a0303afe1..4347b3ee4118954ed7d8df326a2682c500faf45d 100644 (file)
@@ -322,6 +322,9 @@ Added :func:`statistics.fmean` as a faster, floating point variant of
 :func:`statistics.mean()`.  (Contributed by Raymond Hettinger and
 Steven D'Aprano in :issue:`35904`.)
 
+Added :func:`statistics.geometric_mean()`
+(Contributed by Raymond Hettinger in :issue:`27181`.)
+
 Added :func:`statistics.multimode` that returns a list of the most
 common values. (Contributed by Raymond Hettinger in :issue:`35892`.)
 
index bd8a6f96381a72e9d401445131903cb9728e4dcb..262ad976b65cb208a52517fcc30e11314ed94c18 100644 (file)
@@ -11,13 +11,14 @@ Calculating averages
 Function            Description
 ==================  =============================================
 mean                Arithmetic mean (average) of data.
+geometric_mean      Geometric mean of data.
 harmonic_mean       Harmonic mean of data.
 median              Median (middle value) of data.
 median_low          Low median of data.
 median_high         High median of data.
 median_grouped      Median, or 50th percentile, of grouped data.
 mode                Mode (most common value) of data.
-multimode           List of modes (most common values of data)
+multimode           List of modes (most common values of data).
 ==================  =============================================
 
 Calculate the arithmetic mean ("the average") of data:
@@ -81,6 +82,7 @@ __all__ = [ 'StatisticsError', 'NormalDist',
             'pstdev', 'pvariance', 'stdev', 'variance',
             'median',  'median_low', 'median_high', 'median_grouped',
             'mean', 'mode', 'multimode', 'harmonic_mean', 'fmean',
+            'geometric_mean',
           ]
 
 import math
@@ -328,6 +330,24 @@ def fmean(data):
     except ZeroDivisionError:
         raise StatisticsError('fmean requires at least one data point') from None
 
+def geometric_mean(data):
+    """Convert data to floats and compute the geometric mean.
+
+    Raises a StatisticsError if the input dataset is empty,
+    if it contains a zero, or if it contains a negative value.
+
+    No special efforts are made to achieve exact results.
+    (However, this may change in the future.)
+
+    >>> round(geometric_mean([54, 24, 36]), 9)
+    36.0
+    """
+    try:
+        return exp(fmean(map(log, data)))
+    except ValueError:
+        raise StatisticsError('geometric mean requires a non-empty dataset '
+                              ' containing positive numbers') from None
+
 def harmonic_mean(data):
     """Return the harmonic mean of data.
 
index 7f7839de460039bbf150a473ba87ba811ac5170b..4d397eb1265d364d978093714bd18da901ffc566 100644 (file)
@@ -2038,6 +2038,94 @@ class TestStdev(VarianceStdevMixin, NumericTestCase):
         expected = math.sqrt(statistics.variance(data))
         self.assertEqual(self.func(data), expected)
 
+class TestGeometricMean(unittest.TestCase):
+
+    def test_basics(self):
+        geometric_mean = statistics.geometric_mean
+        self.assertAlmostEqual(geometric_mean([54, 24, 36]), 36.0)
+        self.assertAlmostEqual(geometric_mean([4.0, 9.0]), 6.0)
+        self.assertAlmostEqual(geometric_mean([17.625]), 17.625)
+
+        random.seed(86753095551212)
+        for rng in [
+                range(1, 100),
+                range(1, 1_000),
+                range(1, 10_000),
+                range(500, 10_000, 3),
+                range(10_000, 500, -3),
+                [12, 17, 13, 5, 120, 7],
+                [random.expovariate(50.0) for i in range(1_000)],
+                [random.lognormvariate(20.0, 3.0) for i in range(2_000)],
+                [random.triangular(2000, 3000, 2200) for i in range(3_000)],
+            ]:
+            gm_decimal = math.prod(map(Decimal, rng)) ** (Decimal(1) / len(rng))
+            gm_float = geometric_mean(rng)
+            self.assertTrue(math.isclose(gm_float, float(gm_decimal)))
+
+    def test_various_input_types(self):
+        geometric_mean = statistics.geometric_mean
+        D = Decimal
+        F = Fraction
+        # https://www.wolframalpha.com/input/?i=geometric+mean+3.5,+4.0,+5.25
+        expected_mean = 4.18886
+        for data, kind in [
+            ([3.5, 4.0, 5.25], 'floats'),
+            ([D('3.5'), D('4.0'), D('5.25')], 'decimals'),
+            ([F(7, 2), F(4, 1), F(21, 4)], 'fractions'),
+            ([3.5, 4, F(21, 4)], 'mixed types'),
+            ((3.5, 4.0, 5.25), 'tuple'),
+            (iter([3.5, 4.0, 5.25]), 'iterator'),
+                ]:
+            actual_mean = geometric_mean(data)
+            self.assertIs(type(actual_mean), float, kind)
+            self.assertAlmostEqual(actual_mean, expected_mean, places=5)
+
+    def test_big_and_small(self):
+        geometric_mean = statistics.geometric_mean
+
+        # Avoid overflow to infinity
+        large = 2.0 ** 1000
+        big_gm = geometric_mean([54.0 * large, 24.0 * large, 36.0 * large])
+        self.assertTrue(math.isclose(big_gm, 36.0 * large))
+        self.assertFalse(math.isinf(big_gm))
+
+        # Avoid underflow to zero
+        small = 2.0 ** -1000
+        small_gm = geometric_mean([54.0 * small, 24.0 * small, 36.0 * small])
+        self.assertTrue(math.isclose(small_gm, 36.0 * small))
+        self.assertNotEqual(small_gm, 0.0)
+
+    def test_error_cases(self):
+        geometric_mean = statistics.geometric_mean
+        StatisticsError = statistics.StatisticsError
+        with self.assertRaises(StatisticsError):
+            geometric_mean([])                      # empty input
+        with self.assertRaises(StatisticsError):
+            geometric_mean([3.5, 0.0, 5.25])        # zero input
+        with self.assertRaises(StatisticsError):
+            geometric_mean([3.5, -4.0, 5.25])       # negative input
+        with self.assertRaises(StatisticsError):
+            geometric_mean(iter([]))                # empty iterator
+        with self.assertRaises(TypeError):
+            geometric_mean(None)                    # non-iterable input
+        with self.assertRaises(TypeError):
+            geometric_mean([10, None, 20])          # non-numeric input
+        with self.assertRaises(TypeError):
+            geometric_mean()                        # missing data argument
+        with self.assertRaises(TypeError):
+            geometric_mean([10, 20, 60], 70)        # too many arguments
+
+    def test_special_values(self):
+        # Rules for special values are inherited from math.fsum()
+        geometric_mean = statistics.geometric_mean
+        NaN = float('Nan')
+        Inf = float('Inf')
+        self.assertTrue(math.isnan(geometric_mean([10, NaN])), 'nan')
+        self.assertTrue(math.isnan(geometric_mean([NaN, Inf])), 'nan and infinity')
+        self.assertTrue(math.isinf(geometric_mean([10, Inf])), 'infinity')
+        with self.assertRaises(ValueError):
+            geometric_mean([Inf, -Inf])
+
 class TestNormalDist(unittest.TestCase):
 
     # General note on precision: The pdf(), cdf(), and overlap() methods
diff --git a/Misc/NEWS.d/next/Library/2019-03-31-01-18-52.bpo-27181.LVUWcc.rst b/Misc/NEWS.d/next/Library/2019-03-31-01-18-52.bpo-27181.LVUWcc.rst
new file mode 100644 (file)
index 0000000..3ce41c5
--- /dev/null
@@ -0,0 +1 @@
+Add statistics.geometric_mean().