From 7fc633f5a56d9e672cd24133e2e1376347abac6c Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Tue, 4 Dec 2018 00:13:38 -0800 Subject: [PATCH] Add comments regarding speed/space/entropy trade-offs (GH-10885) --- Lib/random.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Lib/random.py b/Lib/random.py index 4b51b6696bf..a7a86070c0a 100644 --- a/Lib/random.py +++ b/Lib/random.py @@ -333,6 +333,19 @@ def sample(self, population, k): # preferred since the list takes less space than the # set and it doesn't suffer from frequent reselections. + # The number of calls to _randbelow() is kept at or near k, the + # theoretical minimum. This is important because running time + # is dominated by _randbelow() and because it extracts the + # least entropy from the underlying random number generators. + + # Memory requirements are kept to the smaller of a k-length + # set or an n-length list. + + # There are other sampling algorithms that do not require + # auxiliary memory, but they were rejected because they made + # too many calls to _randbelow(), making them slower and + # causing them to eat more entropy than necessary. + if isinstance(population, _Set): population = tuple(population) if not isinstance(population, _Sequence):