From 951cfc8e542a54a479b6f01bddcb46e764be7cda Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Tue, 10 May 2022 21:45:17 -0700 Subject: [PATCH] Fix inconsistent return type for statistics median_grouped() gh-92531 (GH-92533) (#92656) --- Lib/statistics.py | 27 +++++++++---------- Lib/test/test_statistics.py | 6 +++++ ...2-05-09-01-27-25.gh-issue-92531.vV7S_O.rst | 3 +++ 3 files changed, 22 insertions(+), 14 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-05-09-01-27-25.gh-issue-92531.vV7S_O.rst diff --git a/Lib/statistics.py b/Lib/statistics.py index 54f4e132651..2d66b0522f1 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -611,7 +611,7 @@ def median_high(data): return data[n // 2] -def median_grouped(data, interval=1): +def median_grouped(data, interval=1.0): """Estimates the median for numeric data binned around the midpoints of consecutive, fixed-width intervals. @@ -650,35 +650,34 @@ def median_grouped(data, interval=1): by exact multiples of *interval*. This is essential for getting a correct result. The function does not check this precondition. + Inputs may be any numeric type that can be coerced to a float during + the interpolation step. + """ data = sorted(data) n = len(data) - if n == 0: + if not n: raise StatisticsError("no median for empty data") - elif n == 1: - return data[0] # Find the value at the midpoint. Remember this corresponds to the # midpoint of the class interval. x = data[n // 2] - # Generate a clear error message for non-numeric data - for obj in (x, interval): - if isinstance(obj, (str, bytes)): - raise TypeError(f'expected a number but got {obj!r}') - # Using O(log n) bisection, find where all the x values occur in the data. # All x will lie within data[i:j]. i = bisect_left(data, x) j = bisect_right(data, x, lo=i) + # Coerce to floats, raising a TypeError if not possible + try: + interval = float(interval) + x = float(x) + except ValueError: + raise TypeError(f'Value cannot be converted to a float') + # Interpolate the median using the formula found at: # https://www.cuemath.com/data/median-of-grouped-data/ - try: - L = x - interval / 2 # The lower limit of the median interval. - except TypeError: - # Coerce mixed types to float. - L = float(x) - float(interval) / 2 + L = x - interval / 2.0 # Lower limit of the median interval cf = i # Cumulative frequency of the preceding interval f = j - i # Number of elements in the median internal return L + interval * (n / 2 - cf) / f diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py index ed6021d60bd..6de98241c29 100644 --- a/Lib/test/test_statistics.py +++ b/Lib/test/test_statistics.py @@ -1742,6 +1742,12 @@ def test_repeated_single_value(self): data = [x]*count self.assertEqual(self.func(data), float(x)) + def test_single_value(self): + # Override method from AverageMixin. + # Average of a single value is the value as a float. + for x in (23, 42.5, 1.3e15, Fraction(15, 19), Decimal('0.28')): + self.assertEqual(self.func([x]), float(x)) + def test_odd_fractions(self): # Test median_grouped works with an odd number of Fractions. F = Fraction diff --git a/Misc/NEWS.d/next/Library/2022-05-09-01-27-25.gh-issue-92531.vV7S_O.rst b/Misc/NEWS.d/next/Library/2022-05-09-01-27-25.gh-issue-92531.vV7S_O.rst new file mode 100644 index 00000000000..574fa6c4d97 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-05-09-01-27-25.gh-issue-92531.vV7S_O.rst @@ -0,0 +1,3 @@ +The statistics.median_grouped() function now always return a float. +Formerly, it did not convert the input type when for sequences of length +one.