mirror of https://github.com/python/cpython.git
Fix inconsistent return type for statistics median_grouped() gh-92531 (GH-92533) (#92656)
This commit is contained in:
parent
5197134c1c
commit
951cfc8e54
|
@ -611,7 +611,7 @@ def median_high(data):
|
|||
return data[n // 2]
|
||||
|
||||
|
||||
def median_grouped(data, interval=1):
|
||||
def median_grouped(data, interval=1.0):
|
||||
"""Estimates the median for numeric data binned around the midpoints
|
||||
of consecutive, fixed-width intervals.
|
||||
|
||||
|
@ -650,35 +650,34 @@ def median_grouped(data, interval=1):
|
|||
by exact multiples of *interval*. This is essential for getting a
|
||||
correct result. The function does not check this precondition.
|
||||
|
||||
Inputs may be any numeric type that can be coerced to a float during
|
||||
the interpolation step.
|
||||
|
||||
"""
|
||||
data = sorted(data)
|
||||
n = len(data)
|
||||
if n == 0:
|
||||
if not n:
|
||||
raise StatisticsError("no median for empty data")
|
||||
elif n == 1:
|
||||
return data[0]
|
||||
|
||||
# Find the value at the midpoint. Remember this corresponds to the
|
||||
# midpoint of the class interval.
|
||||
x = data[n // 2]
|
||||
|
||||
# Generate a clear error message for non-numeric data
|
||||
for obj in (x, interval):
|
||||
if isinstance(obj, (str, bytes)):
|
||||
raise TypeError(f'expected a number but got {obj!r}')
|
||||
|
||||
# Using O(log n) bisection, find where all the x values occur in the data.
|
||||
# All x will lie within data[i:j].
|
||||
i = bisect_left(data, x)
|
||||
j = bisect_right(data, x, lo=i)
|
||||
|
||||
# Coerce to floats, raising a TypeError if not possible
|
||||
try:
|
||||
interval = float(interval)
|
||||
x = float(x)
|
||||
except ValueError:
|
||||
raise TypeError(f'Value cannot be converted to a float')
|
||||
|
||||
# Interpolate the median using the formula found at:
|
||||
# https://www.cuemath.com/data/median-of-grouped-data/
|
||||
try:
|
||||
L = x - interval / 2 # The lower limit of the median interval.
|
||||
except TypeError:
|
||||
# Coerce mixed types to float.
|
||||
L = float(x) - float(interval) / 2
|
||||
L = x - interval / 2.0 # Lower limit of the median interval
|
||||
cf = i # Cumulative frequency of the preceding interval
|
||||
f = j - i # Number of elements in the median internal
|
||||
return L + interval * (n / 2 - cf) / f
|
||||
|
|
|
@ -1742,6 +1742,12 @@ def test_repeated_single_value(self):
|
|||
data = [x]*count
|
||||
self.assertEqual(self.func(data), float(x))
|
||||
|
||||
def test_single_value(self):
|
||||
# Override method from AverageMixin.
|
||||
# Average of a single value is the value as a float.
|
||||
for x in (23, 42.5, 1.3e15, Fraction(15, 19), Decimal('0.28')):
|
||||
self.assertEqual(self.func([x]), float(x))
|
||||
|
||||
def test_odd_fractions(self):
|
||||
# Test median_grouped works with an odd number of Fractions.
|
||||
F = Fraction
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
The statistics.median_grouped() function now always return a float.
|
||||
Formerly, it did not convert the input type when for sequences of length
|
||||
one.
|
Loading…
Reference in New Issue