random.py revision 8ac1495a6a1d18111a626cec0c7f2eb67df3edb3
1"""Random variable generators.
2
3    integers
4    --------
5           uniform within range
6
7    sequences
8    ---------
9           pick random element
10           generate random permutation
11
12    distributions on the real line:
13    ------------------------------
14           uniform
15           normal (Gaussian)
16           lognormal
17           negative exponential
18           gamma
19           beta
20
21    distributions on the circle (angles 0 to 2pi)
22    ---------------------------------------------
23           circular uniform
24           von Mises
25
26Translated from anonymously contributed C/C++ source.
27
28Multi-threading note:  the random number generator used here is not thread-
29safe; it is possible that two calls return the same random value.  However,
30you can instantiate a different instance of Random() in each thread to get
31generators that don't share state, then use .setstate() and .jumpahead() to
32move the generators to disjoint segments of the full period.  For example,
33
34def create_generators(num, delta, firstseed=None):
35    ""\"Return list of num distinct generators.
36    Each generator has its own unique segment of delta elements from
37    Random.random()'s full period.
38    Seed the first generator with optional arg firstseed (default is
39    None, to seed from current time).
40    ""\"
41
42    from random import Random
43    g = Random(firstseed)
44    result = [g]
45    for i in range(num - 1):
46        laststate = g.getstate()
47        g = Random()
48        g.setstate(laststate)
49        g.jumpahead(delta)
50        result.append(g)
51    return result
52
53gens = create_generators(10, 1000000)
54
55That creates 10 distinct generators, which can be passed out to 10 distinct
56threads.  The generators don't share state so can be called safely in
57parallel.  So long as no thread calls its g.random() more than a million
58times (the second argument to create_generators), the sequences seen by
59each thread will not overlap.
60
61The period of the underlying Wichmann-Hill generator is 6,953,607,871,644,
62and that limits how far this technique can be pushed.
63
64Just for fun, note that since we know the period, .jumpahead() can also be
65used to "move backward in time":
66
67>>> g = Random(42)  # arbitrary
68>>> g.random()
690.25420336316883324
70>>> g.jumpahead(6953607871644L - 1) # move *back* one
71>>> g.random()
720.25420336316883324
73"""
74# XXX The docstring sucks.
75
76from math import log as _log, exp as _exp, pi as _pi, e as _e
77from math import sqrt as _sqrt, acos as _acos, cos as _cos, sin as _sin
78
79__all__ = ["Random","seed","random","uniform","randint","choice",
80           "randrange","shuffle","normalvariate","lognormvariate",
81           "cunifvariate","expovariate","vonmisesvariate","gammavariate",
82           "stdgamma","gauss","betavariate","paretovariate","weibullvariate",
83           "getstate","setstate","jumpahead","whseed"]
84
85def _verify(name, computed, expected):
86    if abs(computed - expected) > 1e-7:
87        raise ValueError(
88            "computed value for %s deviates too much "
89            "(computed %g, expected %g)" % (name, computed, expected))
90
91NV_MAGICCONST = 4 * _exp(-0.5)/_sqrt(2.0)
92_verify('NV_MAGICCONST', NV_MAGICCONST, 1.71552776992141)
93
94TWOPI = 2.0*_pi
95_verify('TWOPI', TWOPI, 6.28318530718)
96
97LOG4 = _log(4.0)
98_verify('LOG4', LOG4, 1.38629436111989)
99
100SG_MAGICCONST = 1.0 + _log(4.5)
101_verify('SG_MAGICCONST', SG_MAGICCONST, 2.50407739677627)
102
103del _verify
104
105# Translated by Guido van Rossum from C source provided by
106# Adrian Baddeley.
107
108class Random:
109
110    VERSION = 1     # used by getstate/setstate
111
112    def __init__(self, x=None):
113        """Initialize an instance.
114
115        Optional argument x controls seeding, as for Random.seed().
116        """
117
118        self.seed(x)
119
120## -------------------- core generator -------------------
121
122    # Specific to Wichmann-Hill generator.  Subclasses wishing to use a
123    # different core generator should override the seed(), random(),
124    # getstate(), setstate() and jumpahead() methods.
125
126    def seed(self, a=None):
127        """Initialize internal state from hashable object.
128
129        None or no argument seeds from current time.
130
131        If a is not None or an int or long, hash(a) is used instead.
132
133        If a is an int or long, a is used directly.  Distinct values between
134        0 and 27814431486575L inclusive are guaranteed to yield distinct
135        internal states (this guarantee is specific to the default
136        Wichmann-Hill generator).
137        """
138
139        if a is None:
140            # Initialize from current time
141            import time
142            a = long(time.time() * 256)
143
144        if type(a) not in (type(3), type(3L)):
145            a = hash(a)
146
147        a, x = divmod(a, 30268)
148        a, y = divmod(a, 30306)
149        a, z = divmod(a, 30322)
150        self._seed = int(x)+1, int(y)+1, int(z)+1
151
152        self.gauss_next = None
153
154    def random(self):
155        """Get the next random number in the range [0.0, 1.0)."""
156
157        # Wichman-Hill random number generator.
158        #
159        # Wichmann, B. A. & Hill, I. D. (1982)
160        # Algorithm AS 183:
161        # An efficient and portable pseudo-random number generator
162        # Applied Statistics 31 (1982) 188-190
163        #
164        # see also:
165        #        Correction to Algorithm AS 183
166        #        Applied Statistics 33 (1984) 123
167        #
168        #        McLeod, A. I. (1985)
169        #        A remark on Algorithm AS 183
170        #        Applied Statistics 34 (1985),198-200
171
172        # This part is thread-unsafe:
173        # BEGIN CRITICAL SECTION
174        x, y, z = self._seed
175        x = (171 * x) % 30269
176        y = (172 * y) % 30307
177        z = (170 * z) % 30323
178        self._seed = x, y, z
179        # END CRITICAL SECTION
180
181        # Note:  on a platform using IEEE-754 double arithmetic, this can
182        # never return 0.0 (asserted by Tim; proof too long for a comment).
183        return (x/30269.0 + y/30307.0 + z/30323.0) % 1.0
184
185    def getstate(self):
186        """Return internal state; can be passed to setstate() later."""
187        return self.VERSION, self._seed, self.gauss_next
188
189    def setstate(self, state):
190        """Restore internal state from object returned by getstate()."""
191        version = state[0]
192        if version == 1:
193            version, self._seed, self.gauss_next = state
194        else:
195            raise ValueError("state with version %s passed to "
196                             "Random.setstate() of version %s" %
197                             (version, self.VERSION))
198
199    def jumpahead(self, n):
200        """Act as if n calls to random() were made, but quickly.
201
202        n is an int, greater than or equal to 0.
203
204        Example use:  If you have 2 threads and know that each will
205        consume no more than a million random numbers, create two Random
206        objects r1 and r2, then do
207            r2.setstate(r1.getstate())
208            r2.jumpahead(1000000)
209        Then r1 and r2 will use guaranteed-disjoint segments of the full
210        period.
211        """
212
213        if not n >= 0:
214            raise ValueError("n must be >= 0")
215        x, y, z = self._seed
216        x = int(x * pow(171, n, 30269)) % 30269
217        y = int(y * pow(172, n, 30307)) % 30307
218        z = int(z * pow(170, n, 30323)) % 30323
219        self._seed = x, y, z
220
221    def __whseed(self, x=0, y=0, z=0):
222        """Set the Wichmann-Hill seed from (x, y, z).
223
224        These must be integers in the range [0, 256).
225        """
226
227        if not type(x) == type(y) == type(z) == type(0):
228            raise TypeError('seeds must be integers')
229        if not (0 <= x < 256 and 0 <= y < 256 and 0 <= z < 256):
230            raise ValueError('seeds must be in range(0, 256)')
231        if 0 == x == y == z:
232            # Initialize from current time
233            import time
234            t = long(time.time() * 256)
235            t = int((t&0xffffff) ^ (t>>24))
236            t, x = divmod(t, 256)
237            t, y = divmod(t, 256)
238            t, z = divmod(t, 256)
239        # Zero is a poor seed, so substitute 1
240        self._seed = (x or 1, y or 1, z or 1)
241
242        self.gauss_next = None
243
244    def whseed(self, a=None):
245        """Seed from hashable object's hash code.
246
247        None or no argument seeds from current time.  It is not guaranteed
248        that objects with distinct hash codes lead to distinct internal
249        states.
250
251        This is obsolete, provided for compatibility with the seed routine
252        used prior to Python 2.1.  Use the .seed() method instead.
253        """
254
255        if a is None:
256            self.__whseed()
257            return
258        a = hash(a)
259        a, x = divmod(a, 256)
260        a, y = divmod(a, 256)
261        a, z = divmod(a, 256)
262        x = (x + a) % 256 or 1
263        y = (y + a) % 256 or 1
264        z = (z + a) % 256 or 1
265        self.__whseed(x, y, z)
266
267## ---- Methods below this point do not need to be overridden when
268## ---- subclassing for the purpose of using a different core generator.
269
270## -------------------- pickle support  -------------------
271
272    def __getstate__(self): # for pickle
273        return self.getstate()
274
275    def __setstate__(self, state):  # for pickle
276        self.setstate(state)
277
278## -------------------- integer methods  -------------------
279
280    def randrange(self, start, stop=None, step=1, int=int, default=None):
281        """Choose a random item from range(start, stop[, step]).
282
283        This fixes the problem with randint() which includes the
284        endpoint; in Python this is usually not what you want.
285        Do not supply the 'int' and 'default' arguments.
286        """
287
288        # This code is a bit messy to make it fast for the
289        # common case while still doing adequate error checking
290        istart = int(start)
291        if istart != start:
292            raise ValueError, "non-integer arg 1 for randrange()"
293        if stop is default:
294            if istart > 0:
295                return int(self.random() * istart)
296            raise ValueError, "empty range for randrange()"
297        istop = int(stop)
298        if istop != stop:
299            raise ValueError, "non-integer stop for randrange()"
300        if step == 1:
301            if istart < istop:
302                return istart + int(self.random() *
303                                   (istop - istart))
304            raise ValueError, "empty range for randrange()"
305        istep = int(step)
306        if istep != step:
307            raise ValueError, "non-integer step for randrange()"
308        if istep > 0:
309            n = (istop - istart + istep - 1) / istep
310        elif istep < 0:
311            n = (istop - istart + istep + 1) / istep
312        else:
313            raise ValueError, "zero step for randrange()"
314
315        if n <= 0:
316            raise ValueError, "empty range for randrange()"
317        return istart + istep*int(self.random() * n)
318
319    def randint(self, a, b):
320        """Return random integer in range [a, b], including both end points.
321        """
322
323        return self.randrange(a, b+1)
324
325## -------------------- sequence methods  -------------------
326
327    def choice(self, seq):
328        """Choose a random element from a non-empty sequence."""
329        return seq[int(self.random() * len(seq))]
330
331    def shuffle(self, x, random=None, int=int):
332        """x, random=random.random -> shuffle list x in place; return None.
333
334        Optional arg random is a 0-argument function returning a random
335        float in [0.0, 1.0); by default, the standard random.random.
336
337        Note that for even rather small len(x), the total number of
338        permutations of x is larger than the period of most random number
339        generators; this implies that "most" permutations of a long
340        sequence can never be generated.
341        """
342
343        if random is None:
344            random = self.random
345        for i in xrange(len(x)-1, 0, -1):
346            # pick an element in x[:i+1] with which to exchange x[i]
347            j = int(random() * (i+1))
348            x[i], x[j] = x[j], x[i]
349
350## -------------------- real-valued distributions  -------------------
351
352## -------------------- uniform distribution -------------------
353
354    def uniform(self, a, b):
355        """Get a random number in the range [a, b)."""
356        return a + (b-a) * self.random()
357
358## -------------------- normal distribution --------------------
359
360    def normalvariate(self, mu, sigma):
361        # mu = mean, sigma = standard deviation
362
363        # Uses Kinderman and Monahan method. Reference: Kinderman,
364        # A.J. and Monahan, J.F., "Computer generation of random
365        # variables using the ratio of uniform deviates", ACM Trans
366        # Math Software, 3, (1977), pp257-260.
367
368        random = self.random
369        while 1:
370            u1 = random()
371            u2 = random()
372            z = NV_MAGICCONST*(u1-0.5)/u2
373            zz = z*z/4.0
374            if zz <= -_log(u2):
375                break
376        return mu + z*sigma
377
378## -------------------- lognormal distribution --------------------
379
380    def lognormvariate(self, mu, sigma):
381        return _exp(self.normalvariate(mu, sigma))
382
383## -------------------- circular uniform --------------------
384
385    def cunifvariate(self, mean, arc):
386        # mean: mean angle (in radians between 0 and pi)
387        # arc:  range of distribution (in radians between 0 and pi)
388
389        return (mean + arc * (self.random() - 0.5)) % _pi
390
391## -------------------- exponential distribution --------------------
392
393    def expovariate(self, lambd):
394        # lambd: rate lambd = 1/mean
395        # ('lambda' is a Python reserved word)
396
397        random = self.random
398        u = random()
399        while u <= 1e-7:
400            u = random()
401        return -_log(u)/lambd
402
403## -------------------- von Mises distribution --------------------
404
405    def vonmisesvariate(self, mu, kappa):
406        # mu:    mean angle (in radians between 0 and 2*pi)
407        # kappa: concentration parameter kappa (>= 0)
408        # if kappa = 0 generate uniform random angle
409
410        # Based upon an algorithm published in: Fisher, N.I.,
411        # "Statistical Analysis of Circular Data", Cambridge
412        # University Press, 1993.
413
414        # Thanks to Magnus Kessler for a correction to the
415        # implementation of step 4.
416
417        random = self.random
418        if kappa <= 1e-6:
419            return TWOPI * random()
420
421        a = 1.0 + _sqrt(1.0 + 4.0 * kappa * kappa)
422        b = (a - _sqrt(2.0 * a))/(2.0 * kappa)
423        r = (1.0 + b * b)/(2.0 * b)
424
425        while 1:
426            u1 = random()
427
428            z = _cos(_pi * u1)
429            f = (1.0 + r * z)/(r + z)
430            c = kappa * (r - f)
431
432            u2 = random()
433
434            if not (u2 >= c * (2.0 - c) and u2 > c * _exp(1.0 - c)):
435                break
436
437        u3 = random()
438        if u3 > 0.5:
439            theta = (mu % TWOPI) + _acos(f)
440        else:
441            theta = (mu % TWOPI) - _acos(f)
442
443        return theta
444
445## -------------------- gamma distribution --------------------
446
447    def gammavariate(self, alpha, beta):
448
449        # alpha > 0, beta > 0, mean is alpha*beta, variance is alpha*beta**2
450
451        # Warning: a few older sources define the gamma distribution in terms
452        # of alpha > -1.0
453        if alpha <= 0.0 or beta <= 0.0:
454            raise ValueError, 'gammavariate: alpha and beta must be > 0.0'
455
456        random = self.random
457        if alpha > 1.0:
458
459            # Uses R.C.H. Cheng, "The generation of Gamma
460            # variables with non-integral shape parameters",
461            # Applied Statistics, (1977), 26, No. 1, p71-74
462
463            ainv = _sqrt(2.0 * alpha - 1.0)
464            bbb = alpha - LOG4
465            ccc = alpha + ainv
466
467            while 1:
468                u1 = random()
469                u2 = random()
470                v = _log(u1/(1.0-u1))/ainv
471                x = alpha*_exp(v)
472                z = u1*u1*u2
473                r = bbb+ccc*v-x
474                if r + SG_MAGICCONST - 4.5*z >= 0.0 or r >= _log(z):
475                    return x * beta
476
477        elif alpha == 1.0:
478            # expovariate(1)
479            u = random()
480            while u <= 1e-7:
481                u = random()
482            return -_log(u) * beta
483
484        else:   # alpha is between 0 and 1 (exclusive)
485
486            # Uses ALGORITHM GS of Statistical Computing - Kennedy & Gentle
487
488            while 1:
489                u = random()
490                b = (_e + alpha)/_e
491                p = b*u
492                if p <= 1.0:
493                    x = pow(p, 1.0/alpha)
494                else:
495                    # p > 1
496                    x = -_log((b-p)/alpha)
497                u1 = random()
498                if not (((p <= 1.0) and (u1 > _exp(-x))) or
499                          ((p > 1)  and  (u1 > pow(x, alpha - 1.0)))):
500                    break
501            return x * beta
502
503
504    def stdgamma(self, alpha, ainv, bbb, ccc):
505        # This method was (and shall remain) undocumented.
506        # This method is deprecated
507        # for the following reasons:
508        # 1. Returns same as .gammavariate(alpha, 1.0)
509        # 2. Requires caller to provide 3 extra arguments
510        #    that are functions of alpha anyway
511        # 3. Can't be used for alpha < 0.5
512
513        # ainv = sqrt(2 * alpha - 1)
514        # bbb = alpha - log(4)
515        # ccc = alpha + ainv
516        import warnings
517        warnings.warn("The stdgamma function is deprecated; "
518                      "use gammavariate() instead",
519                      DeprecationWarning)
520        return self.gammavariate(alpha, 1.0)
521
522
523
524## -------------------- Gauss (faster alternative) --------------------
525
526    def gauss(self, mu, sigma):
527
528        # When x and y are two variables from [0, 1), uniformly
529        # distributed, then
530        #
531        #    cos(2*pi*x)*sqrt(-2*log(1-y))
532        #    sin(2*pi*x)*sqrt(-2*log(1-y))
533        #
534        # are two *independent* variables with normal distribution
535        # (mu = 0, sigma = 1).
536        # (Lambert Meertens)
537        # (corrected version; bug discovered by Mike Miller, fixed by LM)
538
539        # Multithreading note: When two threads call this function
540        # simultaneously, it is possible that they will receive the
541        # same return value.  The window is very small though.  To
542        # avoid this, you have to use a lock around all calls.  (I
543        # didn't want to slow this down in the serial case by using a
544        # lock here.)
545
546        random = self.random
547        z = self.gauss_next
548        self.gauss_next = None
549        if z is None:
550            x2pi = random() * TWOPI
551            g2rad = _sqrt(-2.0 * _log(1.0 - random()))
552            z = _cos(x2pi) * g2rad
553            self.gauss_next = _sin(x2pi) * g2rad
554
555        return mu + z*sigma
556
557## -------------------- beta --------------------
558## See
559## http://sourceforge.net/bugs/?func=detailbug&bug_id=130030&group_id=5470
560## for Ivan Frohne's insightful analysis of why the original implementation:
561##
562##    def betavariate(self, alpha, beta):
563##        # Discrete Event Simulation in C, pp 87-88.
564##
565##        y = self.expovariate(alpha)
566##        z = self.expovariate(1.0/beta)
567##        return z/(y+z)
568##
569## was dead wrong, and how it probably got that way.
570
571    def betavariate(self, alpha, beta):
572        # This version due to Janne Sinkkonen, and matches all the std
573        # texts (e.g., Knuth Vol 2 Ed 3 pg 134 "the beta distribution").
574        y = self.gammavariate(alpha, 1.)
575        if y == 0:
576            return 0.0
577        else:
578            return y / (y + self.gammavariate(beta, 1.))
579
580## -------------------- Pareto --------------------
581
582    def paretovariate(self, alpha):
583        # Jain, pg. 495
584
585        u = self.random()
586        return 1.0 / pow(u, 1.0/alpha)
587
588## -------------------- Weibull --------------------
589
590    def weibullvariate(self, alpha, beta):
591        # Jain, pg. 499; bug fix courtesy Bill Arms
592
593        u = self.random()
594        return alpha * pow(-_log(u), 1.0/beta)
595
596## -------------------- test program --------------------
597
598def _test_generator(n, funccall):
599    import time
600    print n, 'times', funccall
601    code = compile(funccall, funccall, 'eval')
602    sum = 0.0
603    sqsum = 0.0
604    smallest = 1e10
605    largest = -1e10
606    t0 = time.time()
607    for i in range(n):
608        x = eval(code)
609        sum = sum + x
610        sqsum = sqsum + x*x
611        smallest = min(x, smallest)
612        largest = max(x, largest)
613    t1 = time.time()
614    print round(t1-t0, 3), 'sec,',
615    avg = sum/n
616    stddev = _sqrt(sqsum/n - avg*avg)
617    print 'avg %g, stddev %g, min %g, max %g' % \
618              (avg, stddev, smallest, largest)
619
620def _test(N=20000):
621    print 'TWOPI         =', TWOPI
622    print 'LOG4          =', LOG4
623    print 'NV_MAGICCONST =', NV_MAGICCONST
624    print 'SG_MAGICCONST =', SG_MAGICCONST
625    _test_generator(N, 'random()')
626    _test_generator(N, 'normalvariate(0.0, 1.0)')
627    _test_generator(N, 'lognormvariate(0.0, 1.0)')
628    _test_generator(N, 'cunifvariate(0.0, 1.0)')
629    _test_generator(N, 'expovariate(1.0)')
630    _test_generator(N, 'vonmisesvariate(0.0, 1.0)')
631    _test_generator(N, 'gammavariate(0.01, 1.0)')
632    _test_generator(N, 'gammavariate(0.1, 1.0)')
633    _test_generator(N, 'gammavariate(0.1, 2.0)')
634    _test_generator(N, 'gammavariate(0.5, 1.0)')
635    _test_generator(N, 'gammavariate(0.9, 1.0)')
636    _test_generator(N, 'gammavariate(1.0, 1.0)')
637    _test_generator(N, 'gammavariate(2.0, 1.0)')
638    _test_generator(N, 'gammavariate(20.0, 1.0)')
639    _test_generator(N, 'gammavariate(200.0, 1.0)')
640    _test_generator(N, 'gauss(0.0, 1.0)')
641    _test_generator(N, 'betavariate(3.0, 3.0)')
642    _test_generator(N, 'paretovariate(1.0)')
643    _test_generator(N, 'weibullvariate(1.0, 1.0)')
644
645    # Test jumpahead.
646    s = getstate()
647    jumpahead(N)
648    r1 = random()
649    # now do it the slow way
650    setstate(s)
651    for i in range(N):
652        random()
653    r2 = random()
654    if r1 != r2:
655        raise ValueError("jumpahead test failed " + `(N, r1, r2)`)
656
657# Create one instance, seeded from current time, and export its methods
658# as module-level functions.  The functions are not threadsafe, and state
659# is shared across all uses (both in the user's code and in the Python
660# libraries), but that's fine for most programs and is easier for the
661# casual user than making them instantiate their own Random() instance.
662_inst = Random()
663seed = _inst.seed
664random = _inst.random
665uniform = _inst.uniform
666randint = _inst.randint
667choice = _inst.choice
668randrange = _inst.randrange
669shuffle = _inst.shuffle
670normalvariate = _inst.normalvariate
671lognormvariate = _inst.lognormvariate
672cunifvariate = _inst.cunifvariate
673expovariate = _inst.expovariate
674vonmisesvariate = _inst.vonmisesvariate
675gammavariate = _inst.gammavariate
676stdgamma = _inst.stdgamma
677gauss = _inst.gauss
678betavariate = _inst.betavariate
679paretovariate = _inst.paretovariate
680weibullvariate = _inst.weibullvariate
681getstate = _inst.getstate
682setstate = _inst.setstate
683jumpahead = _inst.jumpahead
684whseed = _inst.whseed
685
686if __name__ == '__main__':
687    _test()
688