random.py revision ddb39e799d65748c5ea42c344170befc90af9e64
1e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko"""Random variable generators. 2e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko 3e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko integers 4e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko -------- 5e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko uniform within range 6e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko 7e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko sequences 8e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko --------- 9e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko pick random element 10e19008350e91367c3d18410dad5cedf369ea3258Chih-Hung Hsieh pick random sample 11e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko generate random permutation 12e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko 13e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko distributions on the real line: 14e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko ------------------------------ 15e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko uniform 16e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko triangular 17822b710a714c342dda0087f594c8ababa6630f44Okan Arikan normal (Gaussian) 18822b710a714c342dda0087f594c8ababa6630f44Okan Arikan lognormal 19e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko negative exponential 20e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko gamma 21e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko beta 22e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko pareto 23822b710a714c342dda0087f594c8ababa6630f44Okan Arikan Weibull 24e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko 25e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko distributions on the circle (angles 0 to 2pi) 26e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko --------------------------------------------- 27e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko circular uniform 28e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko von Mises 29e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko 30e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex VakulenkoGeneral notes on the underlying Mersenne Twister core generator: 31e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko 32e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko* The period is 2**19937-1. 33e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko* It is one of the most extensively tested generators in existence. 34e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko* Without a direct way to compute N steps forward, the semantics of 35e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko jumpahead(n) are weakened to simply jump to another distant state and rely 36e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko on the large period to avoid overlapping sequences. 37e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko* The random() method is implemented in C, executes in a single Python step, 38e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko and is, therefore, threadsafe. 39e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko 40e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko""" 41e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko 42e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenkofrom __future__ import division 43e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenkofrom warnings import warn as _warn 44e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenkofrom types import MethodType as _MethodType, BuiltinMethodType as _BuiltinMethodType 45e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenkofrom math import log as _log, exp as _exp, pi as _pi, e as _e, ceil as _ceil 46e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenkofrom math import sqrt as _sqrt, acos as _acos, cos as _cos, sin as _sin 47e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenkofrom os import urandom as _urandom 48e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenkofrom binascii import hexlify as _hexlify 49e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenkoimport hashlib as _hashlib 50e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko 51e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko__all__ = ["Random","seed","random","uniform","randint","choice","sample", 52e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko "randrange","shuffle","normalvariate","lognormvariate", 53e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko "expovariate","vonmisesvariate","gammavariate","triangular", 54e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko "gauss","betavariate","paretovariate","weibullvariate", 55e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko "getstate","setstate","jumpahead", "WichmannHill", "getrandbits", 56e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko "SystemRandom"] 57e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko 5878ac0c5c5fea3f6cd2d3ddc9e76d54b2267f118fLuke SongNV_MAGICCONST = 4 * _exp(-0.5)/_sqrt(2.0) 5978ac0c5c5fea3f6cd2d3ddc9e76d54b2267f118fLuke SongTWOPI = 2.0*_pi 60e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex VakulenkoLOG4 = _log(4.0) 61e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex VakulenkoSG_MAGICCONST = 1.0 + _log(4.5) 62e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex VakulenkoBPF = 53 # Number of bits in a float 63e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex VakulenkoRECIP_BPF = 2**-BPF 64e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko 65e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko 66e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko# Translated by Guido van Rossum from C source provided by 67e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko# Adrian Baddeley. Adapted by Raymond Hettinger for use with 68e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko# the Mersenne Twister and os.urandom() core generators. 69e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko 70e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenkoimport _random 71e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko 72e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenkoclass Random(_random.Random): 73822b710a714c342dda0087f594c8ababa6630f44Okan Arikan """Random number generator base class used by bound module functions. 74e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko 75e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko Used to instantiate instances of Random to get generators that don't 76e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko share state. Especially useful for multi-threaded programs, creating 77e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko a different instance of Random for each thread, and using the jumpahead() 78822b710a714c342dda0087f594c8ababa6630f44Okan Arikan method to ensure that the generated sequences seen by each thread don't 79e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko overlap. 80e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko 81e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko Class Random can also be subclassed if you want to use a different basic 82e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko generator of your own devising: in that case, override the following 83e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko methods: random(), seed(), getstate(), setstate() and jumpahead(). 84e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko Optionally, implement a getrandbits() method so that randrange() can cover 85e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko arbitrarily large ranges. 86e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko 87822b710a714c342dda0087f594c8ababa6630f44Okan Arikan """ 88822b710a714c342dda0087f594c8ababa6630f44Okan Arikan 89e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko VERSION = 3 # used by getstate/setstate 90e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko 91822b710a714c342dda0087f594c8ababa6630f44Okan Arikan def __init__(self, x=None): 92e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko """Initialize an instance. 93e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko 94e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko Optional argument x controls seeding, as for Random.seed(). 95e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko """ 96e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko 97e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko self.seed(x) 98e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko self.gauss_next = None 99e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko 100e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko def seed(self, a=None): 101822b710a714c342dda0087f594c8ababa6630f44Okan Arikan """Initialize internal state from hashable object. 102822b710a714c342dda0087f594c8ababa6630f44Okan Arikan 103e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko None or no argument seeds from current time or from an operating 104e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko system specific randomness source if available. 105e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko 106e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko If a is not None or an int or long, hash(a) is used instead. 107e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko """ 108e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko 109822b710a714c342dda0087f594c8ababa6630f44Okan Arikan if a is None: 110e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko try: 111e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko # Seed with enough bytes to span the 19937 bit 112e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko # state space for the Mersenne Twister 113e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko a = long(_hexlify(_urandom(2500)), 16) 114e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko except NotImplementedError: 115e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko import time 116e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko a = long(time.time() * 256) # use fractional seconds 117822b710a714c342dda0087f594c8ababa6630f44Okan Arikan 118e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko super(Random, self).seed(a) 119e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko self.gauss_next = None 120e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko 121e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko def getstate(self): 1227944b2a7f835490de2bcde60de011feaad30e333Luke Song """Return internal state; can be passed to setstate() later.""" 123e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko return self.VERSION, super(Random, self).getstate(), self.gauss_next 124e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko 125e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko def setstate(self, state): 126e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko """Restore internal state from object returned by getstate().""" 127e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko version = state[0] 128822b710a714c342dda0087f594c8ababa6630f44Okan Arikan if version == 3: 129e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko version, internalstate, self.gauss_next = state 130e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko super(Random, self).setstate(internalstate) 131e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko elif version == 2: 132e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko version, internalstate, self.gauss_next = state 133e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko # In version 2, the state was saved as signed ints, which causes 1347944b2a7f835490de2bcde60de011feaad30e333Luke Song # inconsistencies between 32/64-bit systems. The state is 135e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko # really unsigned 32-bit ints, so we convert negative ints from 136e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko # version 2 to positive longs for version 3. 137e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko try: 138e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko internalstate = tuple( long(x) % (2**32) for x in internalstate ) 139e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko except ValueError, e: 1407944b2a7f835490de2bcde60de011feaad30e333Luke Song raise TypeError, e 141e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko super(Random, self).setstate(internalstate) 142e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko else: 143e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko raise ValueError("state with version %s passed to " 144e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko "Random.setstate() of version %s" % 145e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko (version, self.VERSION)) 146e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko 147e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko def jumpahead(self, n): 148e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko """Change the internal state to one that is likely far away 149e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko from the current state. This method will not be in Py3.x, 150e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko so it is better to simply reseed. 151822b710a714c342dda0087f594c8ababa6630f44Okan Arikan """ 152822b710a714c342dda0087f594c8ababa6630f44Okan Arikan # The super.jumpahead() method uses shuffling to change state, 153e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko # so it needs a large and "interesting" n to work with. Here, 1547944b2a7f835490de2bcde60de011feaad30e333Luke Song # we use hashing to create a large n for the shuffle. 1557944b2a7f835490de2bcde60de011feaad30e333Luke Song s = repr(n) + repr(self.getstate()) 1567944b2a7f835490de2bcde60de011feaad30e333Luke Song n = int(_hashlib.new('sha512', s).hexdigest(), 16) 1577944b2a7f835490de2bcde60de011feaad30e333Luke Song super(Random, self).jumpahead(n) 1587944b2a7f835490de2bcde60de011feaad30e333Luke Song 1597944b2a7f835490de2bcde60de011feaad30e333Luke Song## ---- Methods below this point do not need to be overridden when 1605096c652aa19a501ce28177076de89e58e15b4b3Marie White## ---- subclassing for the purpose of using a different core generator. 1615096c652aa19a501ce28177076de89e58e15b4b3Marie White 1625096c652aa19a501ce28177076de89e58e15b4b3Marie White## -------------------- pickle support ------------------- 1635096c652aa19a501ce28177076de89e58e15b4b3Marie White 1645096c652aa19a501ce28177076de89e58e15b4b3Marie White def __getstate__(self): # for pickle 1655096c652aa19a501ce28177076de89e58e15b4b3Marie White return self.getstate() 1665096c652aa19a501ce28177076de89e58e15b4b3Marie White 1675096c652aa19a501ce28177076de89e58e15b4b3Marie White def __setstate__(self, state): # for pickle 1685096c652aa19a501ce28177076de89e58e15b4b3Marie White self.setstate(state) 1695096c652aa19a501ce28177076de89e58e15b4b3Marie White 17045516aab4012fb8eec77b09f20d84d470e6aa8a6Marie White def __reduce__(self): 1715096c652aa19a501ce28177076de89e58e15b4b3Marie White return self.__class__, (), self.getstate() 172e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko 173e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko## -------------------- integer methods ------------------- 174e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko 175e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko def randrange(self, start, stop=None, step=1, _int=int, _maxwidth=1L<<BPF): 176e4eec20f6263f4a42ae462456f60ea6c4518bb0aAlex Vakulenko """Choose a random item from range(start, stop[, step]). 177 178 This fixes the problem with randint() which includes the 179 endpoint; in Python this is usually not what you want. 180 181 """ 182 183 # This code is a bit messy to make it fast for the 184 # common case while still doing adequate error checking. 185 istart = _int(start) 186 if istart != start: 187 raise ValueError, "non-integer arg 1 for randrange()" 188 if stop is None: 189 if istart > 0: 190 if istart >= _maxwidth: 191 return self._randbelow(istart) 192 return _int(self.random() * istart) 193 raise ValueError, "empty range for randrange()" 194 195 # stop argument supplied. 196 istop = _int(stop) 197 if istop != stop: 198 raise ValueError, "non-integer stop for randrange()" 199 width = istop - istart 200 if step == 1 and width > 0: 201 # Note that 202 # int(istart + self.random()*width) 203 # instead would be incorrect. For example, consider istart 204 # = -2 and istop = 0. Then the guts would be in 205 # -2.0 to 0.0 exclusive on both ends (ignoring that random() 206 # might return 0.0), and because int() truncates toward 0, the 207 # final result would be -1 or 0 (instead of -2 or -1). 208 # istart + int(self.random()*width) 209 # would also be incorrect, for a subtler reason: the RHS 210 # can return a long, and then randrange() would also return 211 # a long, but we're supposed to return an int (for backward 212 # compatibility). 213 214 if width >= _maxwidth: 215 return _int(istart + self._randbelow(width)) 216 return _int(istart + _int(self.random()*width)) 217 if step == 1: 218 raise ValueError, "empty range for randrange() (%d,%d, %d)" % (istart, istop, width) 219 220 # Non-unit step argument supplied. 221 istep = _int(step) 222 if istep != step: 223 raise ValueError, "non-integer step for randrange()" 224 if istep > 0: 225 n = (width + istep - 1) // istep 226 elif istep < 0: 227 n = (width + istep + 1) // istep 228 else: 229 raise ValueError, "zero step for randrange()" 230 231 if n <= 0: 232 raise ValueError, "empty range for randrange()" 233 234 if n >= _maxwidth: 235 return istart + istep*self._randbelow(n) 236 return istart + istep*_int(self.random() * n) 237 238 def randint(self, a, b): 239 """Return random integer in range [a, b], including both end points. 240 """ 241 242 return self.randrange(a, b+1) 243 244 def _randbelow(self, n, _log=_log, _int=int, _maxwidth=1L<<BPF, 245 _Method=_MethodType, _BuiltinMethod=_BuiltinMethodType): 246 """Return a random int in the range [0,n) 247 248 Handles the case where n has more bits than returned 249 by a single call to the underlying generator. 250 """ 251 252 try: 253 getrandbits = self.getrandbits 254 except AttributeError: 255 pass 256 else: 257 # Only call self.getrandbits if the original random() builtin method 258 # has not been overridden or if a new getrandbits() was supplied. 259 # This assures that the two methods correspond. 260 if type(self.random) is _BuiltinMethod or type(getrandbits) is _Method: 261 k = _int(1.00001 + _log(n-1, 2.0)) # 2**k > n-1 > 2**(k-2) 262 r = getrandbits(k) 263 while r >= n: 264 r = getrandbits(k) 265 return r 266 if n >= _maxwidth: 267 _warn("Underlying random() generator does not supply \n" 268 "enough bits to choose from a population range this large") 269 return _int(self.random() * n) 270 271## -------------------- sequence methods ------------------- 272 273 def choice(self, seq): 274 """Choose a random element from a non-empty sequence.""" 275 return seq[int(self.random() * len(seq))] # raises IndexError if seq is empty 276 277 def shuffle(self, x, random=None): 278 """x, random=random.random -> shuffle list x in place; return None. 279 280 Optional arg random is a 0-argument function returning a random 281 float in [0.0, 1.0); by default, the standard random.random. 282 283 """ 284 285 if random is None: 286 random = self.random 287 _int = int 288 for i in reversed(xrange(1, len(x))): 289 # pick an element in x[:i+1] with which to exchange x[i] 290 j = _int(random() * (i+1)) 291 x[i], x[j] = x[j], x[i] 292 293 def sample(self, population, k): 294 """Chooses k unique random elements from a population sequence. 295 296 Returns a new list containing elements from the population while 297 leaving the original population unchanged. The resulting list is 298 in selection order so that all sub-slices will also be valid random 299 samples. This allows raffle winners (the sample) to be partitioned 300 into grand prize and second place winners (the subslices). 301 302 Members of the population need not be hashable or unique. If the 303 population contains repeats, then each occurrence is a possible 304 selection in the sample. 305 306 To choose a sample in a range of integers, use xrange as an argument. 307 This is especially fast and space efficient for sampling from a 308 large population: sample(xrange(10000000), 60) 309 """ 310 311 # Sampling without replacement entails tracking either potential 312 # selections (the pool) in a list or previous selections in a set. 313 314 # When the number of selections is small compared to the 315 # population, then tracking selections is efficient, requiring 316 # only a small set and an occasional reselection. For 317 # a larger number of selections, the pool tracking method is 318 # preferred since the list takes less space than the 319 # set and it doesn't suffer from frequent reselections. 320 321 n = len(population) 322 if not 0 <= k <= n: 323 raise ValueError("sample larger than population") 324 random = self.random 325 _int = int 326 result = [None] * k 327 setsize = 21 # size of a small set minus size of an empty list 328 if k > 5: 329 setsize += 4 ** _ceil(_log(k * 3, 4)) # table size for big sets 330 if n <= setsize or hasattr(population, "keys"): 331 # An n-length list is smaller than a k-length set, or this is a 332 # mapping type so the other algorithm wouldn't work. 333 pool = list(population) 334 for i in xrange(k): # invariant: non-selected at [0,n-i) 335 j = _int(random() * (n-i)) 336 result[i] = pool[j] 337 pool[j] = pool[n-i-1] # move non-selected item into vacancy 338 else: 339 try: 340 selected = set() 341 selected_add = selected.add 342 for i in xrange(k): 343 j = _int(random() * n) 344 while j in selected: 345 j = _int(random() * n) 346 selected_add(j) 347 result[i] = population[j] 348 except (TypeError, KeyError): # handle (at least) sets 349 if isinstance(population, list): 350 raise 351 return self.sample(tuple(population), k) 352 return result 353 354## -------------------- real-valued distributions ------------------- 355 356## -------------------- uniform distribution ------------------- 357 358 def uniform(self, a, b): 359 "Get a random number in the range [a, b) or [a, b] depending on rounding." 360 return a + (b-a) * self.random() 361 362## -------------------- triangular -------------------- 363 364 def triangular(self, low=0.0, high=1.0, mode=None): 365 """Triangular distribution. 366 367 Continuous distribution bounded by given lower and upper limits, 368 and having a given mode value in-between. 369 370 http://en.wikipedia.org/wiki/Triangular_distribution 371 372 """ 373 u = self.random() 374 c = 0.5 if mode is None else (mode - low) / (high - low) 375 if u > c: 376 u = 1.0 - u 377 c = 1.0 - c 378 low, high = high, low 379 return low + (high - low) * (u * c) ** 0.5 380 381## -------------------- normal distribution -------------------- 382 383 def normalvariate(self, mu, sigma): 384 """Normal distribution. 385 386 mu is the mean, and sigma is the standard deviation. 387 388 """ 389 # mu = mean, sigma = standard deviation 390 391 # Uses Kinderman and Monahan method. Reference: Kinderman, 392 # A.J. and Monahan, J.F., "Computer generation of random 393 # variables using the ratio of uniform deviates", ACM Trans 394 # Math Software, 3, (1977), pp257-260. 395 396 random = self.random 397 while 1: 398 u1 = random() 399 u2 = 1.0 - random() 400 z = NV_MAGICCONST*(u1-0.5)/u2 401 zz = z*z/4.0 402 if zz <= -_log(u2): 403 break 404 return mu + z*sigma 405 406## -------------------- lognormal distribution -------------------- 407 408 def lognormvariate(self, mu, sigma): 409 """Log normal distribution. 410 411 If you take the natural logarithm of this distribution, you'll get a 412 normal distribution with mean mu and standard deviation sigma. 413 mu can have any value, and sigma must be greater than zero. 414 415 """ 416 return _exp(self.normalvariate(mu, sigma)) 417 418## -------------------- exponential distribution -------------------- 419 420 def expovariate(self, lambd): 421 """Exponential distribution. 422 423 lambd is 1.0 divided by the desired mean. It should be 424 nonzero. (The parameter would be called "lambda", but that is 425 a reserved word in Python.) Returned values range from 0 to 426 positive infinity if lambd is positive, and from negative 427 infinity to 0 if lambd is negative. 428 429 """ 430 # lambd: rate lambd = 1/mean 431 # ('lambda' is a Python reserved word) 432 433 # we use 1-random() instead of random() to preclude the 434 # possibility of taking the log of zero. 435 return -_log(1.0 - self.random())/lambd 436 437## -------------------- von Mises distribution -------------------- 438 439 def vonmisesvariate(self, mu, kappa): 440 """Circular data distribution. 441 442 mu is the mean angle, expressed in radians between 0 and 2*pi, and 443 kappa is the concentration parameter, which must be greater than or 444 equal to zero. If kappa is equal to zero, this distribution reduces 445 to a uniform random angle over the range 0 to 2*pi. 446 447 """ 448 # mu: mean angle (in radians between 0 and 2*pi) 449 # kappa: concentration parameter kappa (>= 0) 450 # if kappa = 0 generate uniform random angle 451 452 # Based upon an algorithm published in: Fisher, N.I., 453 # "Statistical Analysis of Circular Data", Cambridge 454 # University Press, 1993. 455 456 # Thanks to Magnus Kessler for a correction to the 457 # implementation of step 4. 458 459 random = self.random 460 if kappa <= 1e-6: 461 return TWOPI * random() 462 463 s = 0.5 / kappa 464 r = s + _sqrt(1.0 + s * s) 465 466 while 1: 467 u1 = random() 468 z = _cos(_pi * u1) 469 470 d = z / (r + z) 471 u2 = random() 472 if u2 < 1.0 - d * d or u2 <= (1.0 - d) * _exp(d): 473 break 474 475 q = 1.0 / r 476 f = (q + z) / (1.0 + q * z) 477 u3 = random() 478 if u3 > 0.5: 479 theta = (mu + _acos(f)) % TWOPI 480 else: 481 theta = (mu - _acos(f)) % TWOPI 482 483 return theta 484 485## -------------------- gamma distribution -------------------- 486 487 def gammavariate(self, alpha, beta): 488 """Gamma distribution. Not the gamma function! 489 490 Conditions on the parameters are alpha > 0 and beta > 0. 491 492 The probability distribution function is: 493 494 x ** (alpha - 1) * math.exp(-x / beta) 495 pdf(x) = -------------------------------------- 496 math.gamma(alpha) * beta ** alpha 497 498 """ 499 500 # alpha > 0, beta > 0, mean is alpha*beta, variance is alpha*beta**2 501 502 # Warning: a few older sources define the gamma distribution in terms 503 # of alpha > -1.0 504 if alpha <= 0.0 or beta <= 0.0: 505 raise ValueError, 'gammavariate: alpha and beta must be > 0.0' 506 507 random = self.random 508 if alpha > 1.0: 509 510 # Uses R.C.H. Cheng, "The generation of Gamma 511 # variables with non-integral shape parameters", 512 # Applied Statistics, (1977), 26, No. 1, p71-74 513 514 ainv = _sqrt(2.0 * alpha - 1.0) 515 bbb = alpha - LOG4 516 ccc = alpha + ainv 517 518 while 1: 519 u1 = random() 520 if not 1e-7 < u1 < .9999999: 521 continue 522 u2 = 1.0 - random() 523 v = _log(u1/(1.0-u1))/ainv 524 x = alpha*_exp(v) 525 z = u1*u1*u2 526 r = bbb+ccc*v-x 527 if r + SG_MAGICCONST - 4.5*z >= 0.0 or r >= _log(z): 528 return x * beta 529 530 elif alpha == 1.0: 531 # expovariate(1) 532 u = random() 533 while u <= 1e-7: 534 u = random() 535 return -_log(u) * beta 536 537 else: # alpha is between 0 and 1 (exclusive) 538 539 # Uses ALGORITHM GS of Statistical Computing - Kennedy & Gentle 540 541 while 1: 542 u = random() 543 b = (_e + alpha)/_e 544 p = b*u 545 if p <= 1.0: 546 x = p ** (1.0/alpha) 547 else: 548 x = -_log((b-p)/alpha) 549 u1 = random() 550 if p > 1.0: 551 if u1 <= x ** (alpha - 1.0): 552 break 553 elif u1 <= _exp(-x): 554 break 555 return x * beta 556 557## -------------------- Gauss (faster alternative) -------------------- 558 559 def gauss(self, mu, sigma): 560 """Gaussian distribution. 561 562 mu is the mean, and sigma is the standard deviation. This is 563 slightly faster than the normalvariate() function. 564 565 Not thread-safe without a lock around calls. 566 567 """ 568 569 # When x and y are two variables from [0, 1), uniformly 570 # distributed, then 571 # 572 # cos(2*pi*x)*sqrt(-2*log(1-y)) 573 # sin(2*pi*x)*sqrt(-2*log(1-y)) 574 # 575 # are two *independent* variables with normal distribution 576 # (mu = 0, sigma = 1). 577 # (Lambert Meertens) 578 # (corrected version; bug discovered by Mike Miller, fixed by LM) 579 580 # Multithreading note: When two threads call this function 581 # simultaneously, it is possible that they will receive the 582 # same return value. The window is very small though. To 583 # avoid this, you have to use a lock around all calls. (I 584 # didn't want to slow this down in the serial case by using a 585 # lock here.) 586 587 random = self.random 588 z = self.gauss_next 589 self.gauss_next = None 590 if z is None: 591 x2pi = random() * TWOPI 592 g2rad = _sqrt(-2.0 * _log(1.0 - random())) 593 z = _cos(x2pi) * g2rad 594 self.gauss_next = _sin(x2pi) * g2rad 595 596 return mu + z*sigma 597 598## -------------------- beta -------------------- 599## See 600## http://mail.python.org/pipermail/python-bugs-list/2001-January/003752.html 601## for Ivan Frohne's insightful analysis of why the original implementation: 602## 603## def betavariate(self, alpha, beta): 604## # Discrete Event Simulation in C, pp 87-88. 605## 606## y = self.expovariate(alpha) 607## z = self.expovariate(1.0/beta) 608## return z/(y+z) 609## 610## was dead wrong, and how it probably got that way. 611 612 def betavariate(self, alpha, beta): 613 """Beta distribution. 614 615 Conditions on the parameters are alpha > 0 and beta > 0. 616 Returned values range between 0 and 1. 617 618 """ 619 620 # This version due to Janne Sinkkonen, and matches all the std 621 # texts (e.g., Knuth Vol 2 Ed 3 pg 134 "the beta distribution"). 622 y = self.gammavariate(alpha, 1.) 623 if y == 0: 624 return 0.0 625 else: 626 return y / (y + self.gammavariate(beta, 1.)) 627 628## -------------------- Pareto -------------------- 629 630 def paretovariate(self, alpha): 631 """Pareto distribution. alpha is the shape parameter.""" 632 # Jain, pg. 495 633 634 u = 1.0 - self.random() 635 return 1.0 / pow(u, 1.0/alpha) 636 637## -------------------- Weibull -------------------- 638 639 def weibullvariate(self, alpha, beta): 640 """Weibull distribution. 641 642 alpha is the scale parameter and beta is the shape parameter. 643 644 """ 645 # Jain, pg. 499; bug fix courtesy Bill Arms 646 647 u = 1.0 - self.random() 648 return alpha * pow(-_log(u), 1.0/beta) 649 650## -------------------- Wichmann-Hill ------------------- 651 652class WichmannHill(Random): 653 654 VERSION = 1 # used by getstate/setstate 655 656 def seed(self, a=None): 657 """Initialize internal state from hashable object. 658 659 None or no argument seeds from current time or from an operating 660 system specific randomness source if available. 661 662 If a is not None or an int or long, hash(a) is used instead. 663 664 If a is an int or long, a is used directly. Distinct values between 665 0 and 27814431486575L inclusive are guaranteed to yield distinct 666 internal states (this guarantee is specific to the default 667 Wichmann-Hill generator). 668 """ 669 670 if a is None: 671 try: 672 a = long(_hexlify(_urandom(16)), 16) 673 except NotImplementedError: 674 import time 675 a = long(time.time() * 256) # use fractional seconds 676 677 if not isinstance(a, (int, long)): 678 a = hash(a) 679 680 a, x = divmod(a, 30268) 681 a, y = divmod(a, 30306) 682 a, z = divmod(a, 30322) 683 self._seed = int(x)+1, int(y)+1, int(z)+1 684 685 self.gauss_next = None 686 687 def random(self): 688 """Get the next random number in the range [0.0, 1.0).""" 689 690 # Wichman-Hill random number generator. 691 # 692 # Wichmann, B. A. & Hill, I. D. (1982) 693 # Algorithm AS 183: 694 # An efficient and portable pseudo-random number generator 695 # Applied Statistics 31 (1982) 188-190 696 # 697 # see also: 698 # Correction to Algorithm AS 183 699 # Applied Statistics 33 (1984) 123 700 # 701 # McLeod, A. I. (1985) 702 # A remark on Algorithm AS 183 703 # Applied Statistics 34 (1985),198-200 704 705 # This part is thread-unsafe: 706 # BEGIN CRITICAL SECTION 707 x, y, z = self._seed 708 x = (171 * x) % 30269 709 y = (172 * y) % 30307 710 z = (170 * z) % 30323 711 self._seed = x, y, z 712 # END CRITICAL SECTION 713 714 # Note: on a platform using IEEE-754 double arithmetic, this can 715 # never return 0.0 (asserted by Tim; proof too long for a comment). 716 return (x/30269.0 + y/30307.0 + z/30323.0) % 1.0 717 718 def getstate(self): 719 """Return internal state; can be passed to setstate() later.""" 720 return self.VERSION, self._seed, self.gauss_next 721 722 def setstate(self, state): 723 """Restore internal state from object returned by getstate().""" 724 version = state[0] 725 if version == 1: 726 version, self._seed, self.gauss_next = state 727 else: 728 raise ValueError("state with version %s passed to " 729 "Random.setstate() of version %s" % 730 (version, self.VERSION)) 731 732 def jumpahead(self, n): 733 """Act as if n calls to random() were made, but quickly. 734 735 n is an int, greater than or equal to 0. 736 737 Example use: If you have 2 threads and know that each will 738 consume no more than a million random numbers, create two Random 739 objects r1 and r2, then do 740 r2.setstate(r1.getstate()) 741 r2.jumpahead(1000000) 742 Then r1 and r2 will use guaranteed-disjoint segments of the full 743 period. 744 """ 745 746 if not n >= 0: 747 raise ValueError("n must be >= 0") 748 x, y, z = self._seed 749 x = int(x * pow(171, n, 30269)) % 30269 750 y = int(y * pow(172, n, 30307)) % 30307 751 z = int(z * pow(170, n, 30323)) % 30323 752 self._seed = x, y, z 753 754 def __whseed(self, x=0, y=0, z=0): 755 """Set the Wichmann-Hill seed from (x, y, z). 756 757 These must be integers in the range [0, 256). 758 """ 759 760 if not type(x) == type(y) == type(z) == int: 761 raise TypeError('seeds must be integers') 762 if not (0 <= x < 256 and 0 <= y < 256 and 0 <= z < 256): 763 raise ValueError('seeds must be in range(0, 256)') 764 if 0 == x == y == z: 765 # Initialize from current time 766 import time 767 t = long(time.time() * 256) 768 t = int((t&0xffffff) ^ (t>>24)) 769 t, x = divmod(t, 256) 770 t, y = divmod(t, 256) 771 t, z = divmod(t, 256) 772 # Zero is a poor seed, so substitute 1 773 self._seed = (x or 1, y or 1, z or 1) 774 775 self.gauss_next = None 776 777 def whseed(self, a=None): 778 """Seed from hashable object's hash code. 779 780 None or no argument seeds from current time. It is not guaranteed 781 that objects with distinct hash codes lead to distinct internal 782 states. 783 784 This is obsolete, provided for compatibility with the seed routine 785 used prior to Python 2.1. Use the .seed() method instead. 786 """ 787 788 if a is None: 789 self.__whseed() 790 return 791 a = hash(a) 792 a, x = divmod(a, 256) 793 a, y = divmod(a, 256) 794 a, z = divmod(a, 256) 795 x = (x + a) % 256 or 1 796 y = (y + a) % 256 or 1 797 z = (z + a) % 256 or 1 798 self.__whseed(x, y, z) 799 800## --------------- Operating System Random Source ------------------ 801 802class SystemRandom(Random): 803 """Alternate random number generator using sources provided 804 by the operating system (such as /dev/urandom on Unix or 805 CryptGenRandom on Windows). 806 807 Not available on all systems (see os.urandom() for details). 808 """ 809 810 def random(self): 811 """Get the next random number in the range [0.0, 1.0).""" 812 return (long(_hexlify(_urandom(7)), 16) >> 3) * RECIP_BPF 813 814 def getrandbits(self, k): 815 """getrandbits(k) -> x. Generates a long int with k random bits.""" 816 if k <= 0: 817 raise ValueError('number of bits must be greater than zero') 818 if k != int(k): 819 raise TypeError('number of bits should be an integer') 820 bytes = (k + 7) // 8 # bits / 8 and rounded up 821 x = long(_hexlify(_urandom(bytes)), 16) 822 return x >> (bytes * 8 - k) # trim excess bits 823 824 def _stub(self, *args, **kwds): 825 "Stub method. Not used for a system random number generator." 826 return None 827 seed = jumpahead = _stub 828 829 def _notimplemented(self, *args, **kwds): 830 "Method should not be called for a system random number generator." 831 raise NotImplementedError('System entropy source does not have state.') 832 getstate = setstate = _notimplemented 833 834## -------------------- test program -------------------- 835 836def _test_generator(n, func, args): 837 import time 838 print n, 'times', func.__name__ 839 total = 0.0 840 sqsum = 0.0 841 smallest = 1e10 842 largest = -1e10 843 t0 = time.time() 844 for i in range(n): 845 x = func(*args) 846 total += x 847 sqsum = sqsum + x*x 848 smallest = min(x, smallest) 849 largest = max(x, largest) 850 t1 = time.time() 851 print round(t1-t0, 3), 'sec,', 852 avg = total/n 853 stddev = _sqrt(sqsum/n - avg*avg) 854 print 'avg %g, stddev %g, min %g, max %g' % \ 855 (avg, stddev, smallest, largest) 856 857 858def _test(N=2000): 859 _test_generator(N, random, ()) 860 _test_generator(N, normalvariate, (0.0, 1.0)) 861 _test_generator(N, lognormvariate, (0.0, 1.0)) 862 _test_generator(N, vonmisesvariate, (0.0, 1.0)) 863 _test_generator(N, gammavariate, (0.01, 1.0)) 864 _test_generator(N, gammavariate, (0.1, 1.0)) 865 _test_generator(N, gammavariate, (0.1, 2.0)) 866 _test_generator(N, gammavariate, (0.5, 1.0)) 867 _test_generator(N, gammavariate, (0.9, 1.0)) 868 _test_generator(N, gammavariate, (1.0, 1.0)) 869 _test_generator(N, gammavariate, (2.0, 1.0)) 870 _test_generator(N, gammavariate, (20.0, 1.0)) 871 _test_generator(N, gammavariate, (200.0, 1.0)) 872 _test_generator(N, gauss, (0.0, 1.0)) 873 _test_generator(N, betavariate, (3.0, 3.0)) 874 _test_generator(N, triangular, (0.0, 1.0, 1.0/3.0)) 875 876# Create one instance, seeded from current time, and export its methods 877# as module-level functions. The functions share state across all uses 878#(both in the user's code and in the Python libraries), but that's fine 879# for most programs and is easier for the casual user than making them 880# instantiate their own Random() instance. 881 882_inst = Random() 883seed = _inst.seed 884random = _inst.random 885uniform = _inst.uniform 886triangular = _inst.triangular 887randint = _inst.randint 888choice = _inst.choice 889randrange = _inst.randrange 890sample = _inst.sample 891shuffle = _inst.shuffle 892normalvariate = _inst.normalvariate 893lognormvariate = _inst.lognormvariate 894expovariate = _inst.expovariate 895vonmisesvariate = _inst.vonmisesvariate 896gammavariate = _inst.gammavariate 897gauss = _inst.gauss 898betavariate = _inst.betavariate 899paretovariate = _inst.paretovariate 900weibullvariate = _inst.weibullvariate 901getstate = _inst.getstate 902setstate = _inst.setstate 903jumpahead = _inst.jumpahead 904getrandbits = _inst.getrandbits 905 906if __name__ == '__main__': 907 _test() 908