random.py revision 8ac1495a6a1d18111a626cec0c7f2eb67df3edb3
1"""Random variable generators. 2 3 integers 4 -------- 5 uniform within range 6 7 sequences 8 --------- 9 pick random element 10 generate random permutation 11 12 distributions on the real line: 13 ------------------------------ 14 uniform 15 normal (Gaussian) 16 lognormal 17 negative exponential 18 gamma 19 beta 20 21 distributions on the circle (angles 0 to 2pi) 22 --------------------------------------------- 23 circular uniform 24 von Mises 25 26Translated from anonymously contributed C/C++ source. 27 28Multi-threading note: the random number generator used here is not thread- 29safe; it is possible that two calls return the same random value. However, 30you can instantiate a different instance of Random() in each thread to get 31generators that don't share state, then use .setstate() and .jumpahead() to 32move the generators to disjoint segments of the full period. For example, 33 34def create_generators(num, delta, firstseed=None): 35 ""\"Return list of num distinct generators. 36 Each generator has its own unique segment of delta elements from 37 Random.random()'s full period. 38 Seed the first generator with optional arg firstseed (default is 39 None, to seed from current time). 40 ""\" 41 42 from random import Random 43 g = Random(firstseed) 44 result = [g] 45 for i in range(num - 1): 46 laststate = g.getstate() 47 g = Random() 48 g.setstate(laststate) 49 g.jumpahead(delta) 50 result.append(g) 51 return result 52 53gens = create_generators(10, 1000000) 54 55That creates 10 distinct generators, which can be passed out to 10 distinct 56threads. The generators don't share state so can be called safely in 57parallel. So long as no thread calls its g.random() more than a million 58times (the second argument to create_generators), the sequences seen by 59each thread will not overlap. 60 61The period of the underlying Wichmann-Hill generator is 6,953,607,871,644, 62and that limits how far this technique can be pushed. 63 64Just for fun, note that since we know the period, .jumpahead() can also be 65used to "move backward in time": 66 67>>> g = Random(42) # arbitrary 68>>> g.random() 690.25420336316883324 70>>> g.jumpahead(6953607871644L - 1) # move *back* one 71>>> g.random() 720.25420336316883324 73""" 74# XXX The docstring sucks. 75 76from math import log as _log, exp as _exp, pi as _pi, e as _e 77from math import sqrt as _sqrt, acos as _acos, cos as _cos, sin as _sin 78 79__all__ = ["Random","seed","random","uniform","randint","choice", 80 "randrange","shuffle","normalvariate","lognormvariate", 81 "cunifvariate","expovariate","vonmisesvariate","gammavariate", 82 "stdgamma","gauss","betavariate","paretovariate","weibullvariate", 83 "getstate","setstate","jumpahead","whseed"] 84 85def _verify(name, computed, expected): 86 if abs(computed - expected) > 1e-7: 87 raise ValueError( 88 "computed value for %s deviates too much " 89 "(computed %g, expected %g)" % (name, computed, expected)) 90 91NV_MAGICCONST = 4 * _exp(-0.5)/_sqrt(2.0) 92_verify('NV_MAGICCONST', NV_MAGICCONST, 1.71552776992141) 93 94TWOPI = 2.0*_pi 95_verify('TWOPI', TWOPI, 6.28318530718) 96 97LOG4 = _log(4.0) 98_verify('LOG4', LOG4, 1.38629436111989) 99 100SG_MAGICCONST = 1.0 + _log(4.5) 101_verify('SG_MAGICCONST', SG_MAGICCONST, 2.50407739677627) 102 103del _verify 104 105# Translated by Guido van Rossum from C source provided by 106# Adrian Baddeley. 107 108class Random: 109 110 VERSION = 1 # used by getstate/setstate 111 112 def __init__(self, x=None): 113 """Initialize an instance. 114 115 Optional argument x controls seeding, as for Random.seed(). 116 """ 117 118 self.seed(x) 119 120## -------------------- core generator ------------------- 121 122 # Specific to Wichmann-Hill generator. Subclasses wishing to use a 123 # different core generator should override the seed(), random(), 124 # getstate(), setstate() and jumpahead() methods. 125 126 def seed(self, a=None): 127 """Initialize internal state from hashable object. 128 129 None or no argument seeds from current time. 130 131 If a is not None or an int or long, hash(a) is used instead. 132 133 If a is an int or long, a is used directly. Distinct values between 134 0 and 27814431486575L inclusive are guaranteed to yield distinct 135 internal states (this guarantee is specific to the default 136 Wichmann-Hill generator). 137 """ 138 139 if a is None: 140 # Initialize from current time 141 import time 142 a = long(time.time() * 256) 143 144 if type(a) not in (type(3), type(3L)): 145 a = hash(a) 146 147 a, x = divmod(a, 30268) 148 a, y = divmod(a, 30306) 149 a, z = divmod(a, 30322) 150 self._seed = int(x)+1, int(y)+1, int(z)+1 151 152 self.gauss_next = None 153 154 def random(self): 155 """Get the next random number in the range [0.0, 1.0).""" 156 157 # Wichman-Hill random number generator. 158 # 159 # Wichmann, B. A. & Hill, I. D. (1982) 160 # Algorithm AS 183: 161 # An efficient and portable pseudo-random number generator 162 # Applied Statistics 31 (1982) 188-190 163 # 164 # see also: 165 # Correction to Algorithm AS 183 166 # Applied Statistics 33 (1984) 123 167 # 168 # McLeod, A. I. (1985) 169 # A remark on Algorithm AS 183 170 # Applied Statistics 34 (1985),198-200 171 172 # This part is thread-unsafe: 173 # BEGIN CRITICAL SECTION 174 x, y, z = self._seed 175 x = (171 * x) % 30269 176 y = (172 * y) % 30307 177 z = (170 * z) % 30323 178 self._seed = x, y, z 179 # END CRITICAL SECTION 180 181 # Note: on a platform using IEEE-754 double arithmetic, this can 182 # never return 0.0 (asserted by Tim; proof too long for a comment). 183 return (x/30269.0 + y/30307.0 + z/30323.0) % 1.0 184 185 def getstate(self): 186 """Return internal state; can be passed to setstate() later.""" 187 return self.VERSION, self._seed, self.gauss_next 188 189 def setstate(self, state): 190 """Restore internal state from object returned by getstate().""" 191 version = state[0] 192 if version == 1: 193 version, self._seed, self.gauss_next = state 194 else: 195 raise ValueError("state with version %s passed to " 196 "Random.setstate() of version %s" % 197 (version, self.VERSION)) 198 199 def jumpahead(self, n): 200 """Act as if n calls to random() were made, but quickly. 201 202 n is an int, greater than or equal to 0. 203 204 Example use: If you have 2 threads and know that each will 205 consume no more than a million random numbers, create two Random 206 objects r1 and r2, then do 207 r2.setstate(r1.getstate()) 208 r2.jumpahead(1000000) 209 Then r1 and r2 will use guaranteed-disjoint segments of the full 210 period. 211 """ 212 213 if not n >= 0: 214 raise ValueError("n must be >= 0") 215 x, y, z = self._seed 216 x = int(x * pow(171, n, 30269)) % 30269 217 y = int(y * pow(172, n, 30307)) % 30307 218 z = int(z * pow(170, n, 30323)) % 30323 219 self._seed = x, y, z 220 221 def __whseed(self, x=0, y=0, z=0): 222 """Set the Wichmann-Hill seed from (x, y, z). 223 224 These must be integers in the range [0, 256). 225 """ 226 227 if not type(x) == type(y) == type(z) == type(0): 228 raise TypeError('seeds must be integers') 229 if not (0 <= x < 256 and 0 <= y < 256 and 0 <= z < 256): 230 raise ValueError('seeds must be in range(0, 256)') 231 if 0 == x == y == z: 232 # Initialize from current time 233 import time 234 t = long(time.time() * 256) 235 t = int((t&0xffffff) ^ (t>>24)) 236 t, x = divmod(t, 256) 237 t, y = divmod(t, 256) 238 t, z = divmod(t, 256) 239 # Zero is a poor seed, so substitute 1 240 self._seed = (x or 1, y or 1, z or 1) 241 242 self.gauss_next = None 243 244 def whseed(self, a=None): 245 """Seed from hashable object's hash code. 246 247 None or no argument seeds from current time. It is not guaranteed 248 that objects with distinct hash codes lead to distinct internal 249 states. 250 251 This is obsolete, provided for compatibility with the seed routine 252 used prior to Python 2.1. Use the .seed() method instead. 253 """ 254 255 if a is None: 256 self.__whseed() 257 return 258 a = hash(a) 259 a, x = divmod(a, 256) 260 a, y = divmod(a, 256) 261 a, z = divmod(a, 256) 262 x = (x + a) % 256 or 1 263 y = (y + a) % 256 or 1 264 z = (z + a) % 256 or 1 265 self.__whseed(x, y, z) 266 267## ---- Methods below this point do not need to be overridden when 268## ---- subclassing for the purpose of using a different core generator. 269 270## -------------------- pickle support ------------------- 271 272 def __getstate__(self): # for pickle 273 return self.getstate() 274 275 def __setstate__(self, state): # for pickle 276 self.setstate(state) 277 278## -------------------- integer methods ------------------- 279 280 def randrange(self, start, stop=None, step=1, int=int, default=None): 281 """Choose a random item from range(start, stop[, step]). 282 283 This fixes the problem with randint() which includes the 284 endpoint; in Python this is usually not what you want. 285 Do not supply the 'int' and 'default' arguments. 286 """ 287 288 # This code is a bit messy to make it fast for the 289 # common case while still doing adequate error checking 290 istart = int(start) 291 if istart != start: 292 raise ValueError, "non-integer arg 1 for randrange()" 293 if stop is default: 294 if istart > 0: 295 return int(self.random() * istart) 296 raise ValueError, "empty range for randrange()" 297 istop = int(stop) 298 if istop != stop: 299 raise ValueError, "non-integer stop for randrange()" 300 if step == 1: 301 if istart < istop: 302 return istart + int(self.random() * 303 (istop - istart)) 304 raise ValueError, "empty range for randrange()" 305 istep = int(step) 306 if istep != step: 307 raise ValueError, "non-integer step for randrange()" 308 if istep > 0: 309 n = (istop - istart + istep - 1) / istep 310 elif istep < 0: 311 n = (istop - istart + istep + 1) / istep 312 else: 313 raise ValueError, "zero step for randrange()" 314 315 if n <= 0: 316 raise ValueError, "empty range for randrange()" 317 return istart + istep*int(self.random() * n) 318 319 def randint(self, a, b): 320 """Return random integer in range [a, b], including both end points. 321 """ 322 323 return self.randrange(a, b+1) 324 325## -------------------- sequence methods ------------------- 326 327 def choice(self, seq): 328 """Choose a random element from a non-empty sequence.""" 329 return seq[int(self.random() * len(seq))] 330 331 def shuffle(self, x, random=None, int=int): 332 """x, random=random.random -> shuffle list x in place; return None. 333 334 Optional arg random is a 0-argument function returning a random 335 float in [0.0, 1.0); by default, the standard random.random. 336 337 Note that for even rather small len(x), the total number of 338 permutations of x is larger than the period of most random number 339 generators; this implies that "most" permutations of a long 340 sequence can never be generated. 341 """ 342 343 if random is None: 344 random = self.random 345 for i in xrange(len(x)-1, 0, -1): 346 # pick an element in x[:i+1] with which to exchange x[i] 347 j = int(random() * (i+1)) 348 x[i], x[j] = x[j], x[i] 349 350## -------------------- real-valued distributions ------------------- 351 352## -------------------- uniform distribution ------------------- 353 354 def uniform(self, a, b): 355 """Get a random number in the range [a, b).""" 356 return a + (b-a) * self.random() 357 358## -------------------- normal distribution -------------------- 359 360 def normalvariate(self, mu, sigma): 361 # mu = mean, sigma = standard deviation 362 363 # Uses Kinderman and Monahan method. Reference: Kinderman, 364 # A.J. and Monahan, J.F., "Computer generation of random 365 # variables using the ratio of uniform deviates", ACM Trans 366 # Math Software, 3, (1977), pp257-260. 367 368 random = self.random 369 while 1: 370 u1 = random() 371 u2 = random() 372 z = NV_MAGICCONST*(u1-0.5)/u2 373 zz = z*z/4.0 374 if zz <= -_log(u2): 375 break 376 return mu + z*sigma 377 378## -------------------- lognormal distribution -------------------- 379 380 def lognormvariate(self, mu, sigma): 381 return _exp(self.normalvariate(mu, sigma)) 382 383## -------------------- circular uniform -------------------- 384 385 def cunifvariate(self, mean, arc): 386 # mean: mean angle (in radians between 0 and pi) 387 # arc: range of distribution (in radians between 0 and pi) 388 389 return (mean + arc * (self.random() - 0.5)) % _pi 390 391## -------------------- exponential distribution -------------------- 392 393 def expovariate(self, lambd): 394 # lambd: rate lambd = 1/mean 395 # ('lambda' is a Python reserved word) 396 397 random = self.random 398 u = random() 399 while u <= 1e-7: 400 u = random() 401 return -_log(u)/lambd 402 403## -------------------- von Mises distribution -------------------- 404 405 def vonmisesvariate(self, mu, kappa): 406 # mu: mean angle (in radians between 0 and 2*pi) 407 # kappa: concentration parameter kappa (>= 0) 408 # if kappa = 0 generate uniform random angle 409 410 # Based upon an algorithm published in: Fisher, N.I., 411 # "Statistical Analysis of Circular Data", Cambridge 412 # University Press, 1993. 413 414 # Thanks to Magnus Kessler for a correction to the 415 # implementation of step 4. 416 417 random = self.random 418 if kappa <= 1e-6: 419 return TWOPI * random() 420 421 a = 1.0 + _sqrt(1.0 + 4.0 * kappa * kappa) 422 b = (a - _sqrt(2.0 * a))/(2.0 * kappa) 423 r = (1.0 + b * b)/(2.0 * b) 424 425 while 1: 426 u1 = random() 427 428 z = _cos(_pi * u1) 429 f = (1.0 + r * z)/(r + z) 430 c = kappa * (r - f) 431 432 u2 = random() 433 434 if not (u2 >= c * (2.0 - c) and u2 > c * _exp(1.0 - c)): 435 break 436 437 u3 = random() 438 if u3 > 0.5: 439 theta = (mu % TWOPI) + _acos(f) 440 else: 441 theta = (mu % TWOPI) - _acos(f) 442 443 return theta 444 445## -------------------- gamma distribution -------------------- 446 447 def gammavariate(self, alpha, beta): 448 449 # alpha > 0, beta > 0, mean is alpha*beta, variance is alpha*beta**2 450 451 # Warning: a few older sources define the gamma distribution in terms 452 # of alpha > -1.0 453 if alpha <= 0.0 or beta <= 0.0: 454 raise ValueError, 'gammavariate: alpha and beta must be > 0.0' 455 456 random = self.random 457 if alpha > 1.0: 458 459 # Uses R.C.H. Cheng, "The generation of Gamma 460 # variables with non-integral shape parameters", 461 # Applied Statistics, (1977), 26, No. 1, p71-74 462 463 ainv = _sqrt(2.0 * alpha - 1.0) 464 bbb = alpha - LOG4 465 ccc = alpha + ainv 466 467 while 1: 468 u1 = random() 469 u2 = random() 470 v = _log(u1/(1.0-u1))/ainv 471 x = alpha*_exp(v) 472 z = u1*u1*u2 473 r = bbb+ccc*v-x 474 if r + SG_MAGICCONST - 4.5*z >= 0.0 or r >= _log(z): 475 return x * beta 476 477 elif alpha == 1.0: 478 # expovariate(1) 479 u = random() 480 while u <= 1e-7: 481 u = random() 482 return -_log(u) * beta 483 484 else: # alpha is between 0 and 1 (exclusive) 485 486 # Uses ALGORITHM GS of Statistical Computing - Kennedy & Gentle 487 488 while 1: 489 u = random() 490 b = (_e + alpha)/_e 491 p = b*u 492 if p <= 1.0: 493 x = pow(p, 1.0/alpha) 494 else: 495 # p > 1 496 x = -_log((b-p)/alpha) 497 u1 = random() 498 if not (((p <= 1.0) and (u1 > _exp(-x))) or 499 ((p > 1) and (u1 > pow(x, alpha - 1.0)))): 500 break 501 return x * beta 502 503 504 def stdgamma(self, alpha, ainv, bbb, ccc): 505 # This method was (and shall remain) undocumented. 506 # This method is deprecated 507 # for the following reasons: 508 # 1. Returns same as .gammavariate(alpha, 1.0) 509 # 2. Requires caller to provide 3 extra arguments 510 # that are functions of alpha anyway 511 # 3. Can't be used for alpha < 0.5 512 513 # ainv = sqrt(2 * alpha - 1) 514 # bbb = alpha - log(4) 515 # ccc = alpha + ainv 516 import warnings 517 warnings.warn("The stdgamma function is deprecated; " 518 "use gammavariate() instead", 519 DeprecationWarning) 520 return self.gammavariate(alpha, 1.0) 521 522 523 524## -------------------- Gauss (faster alternative) -------------------- 525 526 def gauss(self, mu, sigma): 527 528 # When x and y are two variables from [0, 1), uniformly 529 # distributed, then 530 # 531 # cos(2*pi*x)*sqrt(-2*log(1-y)) 532 # sin(2*pi*x)*sqrt(-2*log(1-y)) 533 # 534 # are two *independent* variables with normal distribution 535 # (mu = 0, sigma = 1). 536 # (Lambert Meertens) 537 # (corrected version; bug discovered by Mike Miller, fixed by LM) 538 539 # Multithreading note: When two threads call this function 540 # simultaneously, it is possible that they will receive the 541 # same return value. The window is very small though. To 542 # avoid this, you have to use a lock around all calls. (I 543 # didn't want to slow this down in the serial case by using a 544 # lock here.) 545 546 random = self.random 547 z = self.gauss_next 548 self.gauss_next = None 549 if z is None: 550 x2pi = random() * TWOPI 551 g2rad = _sqrt(-2.0 * _log(1.0 - random())) 552 z = _cos(x2pi) * g2rad 553 self.gauss_next = _sin(x2pi) * g2rad 554 555 return mu + z*sigma 556 557## -------------------- beta -------------------- 558## See 559## http://sourceforge.net/bugs/?func=detailbug&bug_id=130030&group_id=5470 560## for Ivan Frohne's insightful analysis of why the original implementation: 561## 562## def betavariate(self, alpha, beta): 563## # Discrete Event Simulation in C, pp 87-88. 564## 565## y = self.expovariate(alpha) 566## z = self.expovariate(1.0/beta) 567## return z/(y+z) 568## 569## was dead wrong, and how it probably got that way. 570 571 def betavariate(self, alpha, beta): 572 # This version due to Janne Sinkkonen, and matches all the std 573 # texts (e.g., Knuth Vol 2 Ed 3 pg 134 "the beta distribution"). 574 y = self.gammavariate(alpha, 1.) 575 if y == 0: 576 return 0.0 577 else: 578 return y / (y + self.gammavariate(beta, 1.)) 579 580## -------------------- Pareto -------------------- 581 582 def paretovariate(self, alpha): 583 # Jain, pg. 495 584 585 u = self.random() 586 return 1.0 / pow(u, 1.0/alpha) 587 588## -------------------- Weibull -------------------- 589 590 def weibullvariate(self, alpha, beta): 591 # Jain, pg. 499; bug fix courtesy Bill Arms 592 593 u = self.random() 594 return alpha * pow(-_log(u), 1.0/beta) 595 596## -------------------- test program -------------------- 597 598def _test_generator(n, funccall): 599 import time 600 print n, 'times', funccall 601 code = compile(funccall, funccall, 'eval') 602 sum = 0.0 603 sqsum = 0.0 604 smallest = 1e10 605 largest = -1e10 606 t0 = time.time() 607 for i in range(n): 608 x = eval(code) 609 sum = sum + x 610 sqsum = sqsum + x*x 611 smallest = min(x, smallest) 612 largest = max(x, largest) 613 t1 = time.time() 614 print round(t1-t0, 3), 'sec,', 615 avg = sum/n 616 stddev = _sqrt(sqsum/n - avg*avg) 617 print 'avg %g, stddev %g, min %g, max %g' % \ 618 (avg, stddev, smallest, largest) 619 620def _test(N=20000): 621 print 'TWOPI =', TWOPI 622 print 'LOG4 =', LOG4 623 print 'NV_MAGICCONST =', NV_MAGICCONST 624 print 'SG_MAGICCONST =', SG_MAGICCONST 625 _test_generator(N, 'random()') 626 _test_generator(N, 'normalvariate(0.0, 1.0)') 627 _test_generator(N, 'lognormvariate(0.0, 1.0)') 628 _test_generator(N, 'cunifvariate(0.0, 1.0)') 629 _test_generator(N, 'expovariate(1.0)') 630 _test_generator(N, 'vonmisesvariate(0.0, 1.0)') 631 _test_generator(N, 'gammavariate(0.01, 1.0)') 632 _test_generator(N, 'gammavariate(0.1, 1.0)') 633 _test_generator(N, 'gammavariate(0.1, 2.0)') 634 _test_generator(N, 'gammavariate(0.5, 1.0)') 635 _test_generator(N, 'gammavariate(0.9, 1.0)') 636 _test_generator(N, 'gammavariate(1.0, 1.0)') 637 _test_generator(N, 'gammavariate(2.0, 1.0)') 638 _test_generator(N, 'gammavariate(20.0, 1.0)') 639 _test_generator(N, 'gammavariate(200.0, 1.0)') 640 _test_generator(N, 'gauss(0.0, 1.0)') 641 _test_generator(N, 'betavariate(3.0, 3.0)') 642 _test_generator(N, 'paretovariate(1.0)') 643 _test_generator(N, 'weibullvariate(1.0, 1.0)') 644 645 # Test jumpahead. 646 s = getstate() 647 jumpahead(N) 648 r1 = random() 649 # now do it the slow way 650 setstate(s) 651 for i in range(N): 652 random() 653 r2 = random() 654 if r1 != r2: 655 raise ValueError("jumpahead test failed " + `(N, r1, r2)`) 656 657# Create one instance, seeded from current time, and export its methods 658# as module-level functions. The functions are not threadsafe, and state 659# is shared across all uses (both in the user's code and in the Python 660# libraries), but that's fine for most programs and is easier for the 661# casual user than making them instantiate their own Random() instance. 662_inst = Random() 663seed = _inst.seed 664random = _inst.random 665uniform = _inst.uniform 666randint = _inst.randint 667choice = _inst.choice 668randrange = _inst.randrange 669shuffle = _inst.shuffle 670normalvariate = _inst.normalvariate 671lognormvariate = _inst.lognormvariate 672cunifvariate = _inst.cunifvariate 673expovariate = _inst.expovariate 674vonmisesvariate = _inst.vonmisesvariate 675gammavariate = _inst.gammavariate 676stdgamma = _inst.stdgamma 677gauss = _inst.gauss 678betavariate = _inst.betavariate 679paretovariate = _inst.paretovariate 680weibullvariate = _inst.weibullvariate 681getstate = _inst.getstate 682setstate = _inst.setstate 683jumpahead = _inst.jumpahead 684whseed = _inst.whseed 685 686if __name__ == '__main__': 687 _test() 688