1a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)# Copyright 2014 The Chromium Authors. All rights reserved.
24e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)# Use of this source code is governed by a BSD-style license that can be
34e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)# found in the LICENSE file.
44e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
54e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)"""A collection of statistical utility functions to be used by metrics."""
64e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
74e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)import math
84e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
94e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
104e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)def Clamp(value, low=0.0, high=1.0):
114e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  """Clamp a value between some low and high value."""
124e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  return min(max(value, low), high)
134e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
144e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
154e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)def NormalizeSamples(samples):
164e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  """Sorts the samples, and map them linearly to the range [0,1].
174e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
184e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  They're mapped such that for the N samples, the first sample is 0.5/N and the
194e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  last sample is (N-0.5)/N.
204e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
214e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  Background: The discrepancy of the sample set i/(N-1); i=0, ..., N-1 is 2/N,
224e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  twice the discrepancy of the sample set (i+1/2)/N; i=0, ..., N-1. In our case
234e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  we don't want to distinguish between these two cases, as our original domain
244e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  is not bounded (it is for Monte Carlo integration, where discrepancy was
254e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  first used).
264e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  """
274e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  if not samples:
284e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    return samples, 1.0
294e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  samples = sorted(samples)
304e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  low = min(samples)
314e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  high = max(samples)
324e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  new_low = 0.5 / len(samples)
334e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  new_high = (len(samples)-0.5) / len(samples)
344e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  if high-low == 0.0:
35e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch    return [0.5] * len(samples), 1.0
364e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  scale = (new_high - new_low) / (high - low)
374e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  for i in xrange(0, len(samples)):
384e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    samples[i] = float(samples[i] - low) * scale + new_low
394e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  return samples, scale
404e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
414e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
42e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdochdef Discrepancy(samples, location_count=None):
434e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  """Computes the discrepancy of a set of 1D samples from the interval [0,1].
444e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
45e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch  The samples must be sorted. We define the discrepancy of an empty set
46e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch  of samples to be zero.
474e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
484e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  http://en.wikipedia.org/wiki/Low-discrepancy_sequence
494e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  http://mathworld.wolfram.com/Discrepancy.html
504e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  """
514e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  if not samples:
52e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch    return 0.0
534e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
544e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  max_local_discrepancy = 0
55e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch  inv_sample_count = 1.0 / len(samples)
564e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  locations = []
574e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  # For each location, stores the number of samples less than that location.
58e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch  count_less = []
594e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  # For each location, stores the number of samples less than or equal to that
604e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  # location.
61e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch  count_less_equal = []
62e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch
63e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch  if location_count:
64e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch    # Generate list of equally spaced locations.
65e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch    sample_index = 0
66e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch    for i in xrange(0, int(location_count)):
67e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      location = float(i) / (location_count-1)
68e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      locations.append(location)
69e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      while sample_index < len(samples) and samples[sample_index] < location:
70e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch        sample_index += 1
71e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      count_less.append(sample_index)
72e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      while  sample_index < len(samples) and samples[sample_index] <= location:
73e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch        sample_index += 1
74e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      count_less_equal.append(sample_index)
75e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch  else:
76e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch    # Populate locations with sample positions. Append 0 and 1 if necessary.
77e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch    if samples[0] > 0.0:
78e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      locations.append(0.0)
79e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      count_less.append(0)
80e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      count_less_equal.append(0)
81e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch    for i in xrange(0, len(samples)):
82e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      locations.append(samples[i])
83e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      count_less.append(i)
84e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      count_less_equal.append(i+1)
85e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch    if samples[-1] < 1.0:
86e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      locations.append(1.0)
87e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      count_less.append(len(samples))
88e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      count_less_equal.append(len(samples))
894e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
904e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  # Iterate over the intervals defined by any pair of locations.
914e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  for i in xrange(0, len(locations)):
92e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch    for j in xrange(i+1, len(locations)):
93e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      # Length of interval
944e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)      length = locations[j] - locations[i]
954e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
96e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      # Local discrepancy for closed interval
97e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      count_closed = count_less_equal[j] - count_less[i]
98e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      local_discrepancy_closed = abs(float(count_closed) *
99e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch                                     inv_sample_count - length)
100e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      max_local_discrepancy = max(local_discrepancy_closed,
101e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch                                  max_local_discrepancy)
102e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch
103e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      # Local discrepancy for open interval
104e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      count_open = count_less[j] - count_less_equal[i]
105e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      local_discrepancy_open = abs(float(count_open) *
106e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch                                   inv_sample_count - length)
107e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      max_local_discrepancy = max(local_discrepancy_open,
108e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch                                  max_local_discrepancy)
1094e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
1104e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  return max_local_discrepancy
1114e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
1124e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
113a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)def TimestampsDiscrepancy(timestamps, absolute=True,
114e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch                          location_count=None):
115a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  """A discrepancy based metric for measuring timestamp jank.
1164e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
117a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  TimestampsDiscrepancy quantifies the largest area of jank observed in a series
118a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  of timestamps.  Note that this is different from metrics based on the
119a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  max_time_interval. For example, the time stamp series A = [0,1,2,3,5,6] and
120a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  B = [0,1,2,3,5,7] have the same max_time_interval = 2, but
1214e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  Discrepancy(B) > Discrepancy(A).
1224e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
1234e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  Two variants of discrepancy can be computed:
1244e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
1254e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  Relative discrepancy is following the original definition of
1264e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  discrepancy. It characterized the largest area of jank, relative to the
1274e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  duration of the entire time stamp series.  We normalize the raw results,
1284e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  because the best case discrepancy for a set of N samples is 1/N (for
1294e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  equally spaced samples), and we want our metric to report 0.0 in that
1304e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  case.
1314e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
1324e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  Absolute discrepancy also characterizes the largest area of jank, but its
1334e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  value wouldn't change (except for imprecisions due to a low
134a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  |interval_multiplier|) if additional 'good' intervals were added to an
1354e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  exisiting list of time stamps.  Its range is [0,inf] and the unit is
1364e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  milliseconds.
1374e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
1384e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  The time stamp series C = [0,2,3,4] and D = [0,2,3,4,5] have the same
1394e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  absolute discrepancy, but D has lower relative discrepancy than C.
1405d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
141a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  |timestamps| may be a list of lists S = [S_1, S_2, ..., S_N], where each
1425d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  S_i is a time stamp series. In that case, the discrepancy D(S) is:
1435d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  D(S) = max(D(S_1), D(S_2), ..., D(S_N))
1444e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  """
145a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  if not timestamps:
146e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch    return 0.0
1475d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
148a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  if isinstance(timestamps[0], list):
149a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    range_discrepancies = [TimestampsDiscrepancy(r) for r in timestamps]
1505d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    return max(range_discrepancies)
1515d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
152a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  samples, sample_scale = NormalizeSamples(timestamps)
153e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch  discrepancy = Discrepancy(samples, location_count)
1544e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  inv_sample_count = 1.0 / len(samples)
1554e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  if absolute:
1564e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    # Compute absolute discrepancy
1574e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    discrepancy /= sample_scale
1584e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  else:
1594e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    # Compute relative discrepancy
1604e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    discrepancy = Clamp((discrepancy-inv_sample_count) / (1.0-inv_sample_count))
1614e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  return discrepancy
1624e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
1634e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
164a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)def DurationsDiscrepancy(durations, absolute=True,
165e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch                         location_count=None):
166a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  """A discrepancy based metric for measuring duration jank.
167a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
168a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  DurationsDiscrepancy computes a jank metric which measures how irregular a
169a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  given sequence of intervals is. In order to minimize jank, each duration
170a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  should be equally long. This is similar to how timestamp jank works,
171a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  and we therefore reuse the timestamp discrepancy function above to compute a
172a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  similar duration discrepancy number.
173a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
174a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  Because timestamp discrepancy is defined in terms of timestamps, we first
175a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  convert the list of durations to monotonically increasing timestamps.
176a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
177a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  Args:
178a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    durations: List of interval lengths in milliseconds.
179a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    absolute: See TimestampsDiscrepancy.
180a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    interval_multiplier: See TimestampsDiscrepancy.
181a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  """
182e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch  if not durations:
183e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch    return 0.0
184e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch
185e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch  timestamps = reduce(lambda x, y: x + [x[-1] + y], durations, [0])
186e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch  return TimestampsDiscrepancy(timestamps, absolute, location_count)
1874e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
188a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
189a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)def ArithmeticMean(data):
190a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  """Calculates arithmetic mean.
1914e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
1924e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  Args:
193a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    data: A list of samples.
1944e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
1954e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  Returns:
196a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    The arithmetic mean value, or 0 if the list is empty.
1974e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  """
198a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  numerator_total = Total(data)
199a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  denominator_total = Total(len(data))
2004e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  return DivideIfPossibleOrZero(numerator_total, denominator_total)
2014e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
2024e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
203a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)def StandardDeviation(data):
204a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  """Calculates the standard deviation.
205a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
206a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  Args:
207a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    data: A list of samples.
208a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
209a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  Returns:
210a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    The standard deviation of the samples provided.
211a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  """
212a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  if len(data) == 1:
213a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    return 0.0
214a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
215a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  mean = ArithmeticMean(data)
216a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  variances = [float(x) - mean for x in data]
217a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  variances = [x * x for x in variances]
218a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  std_dev = math.sqrt(ArithmeticMean(variances))
219a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
220a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  return std_dev
221a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
222a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
223a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)def TrapezoidalRule(data, dx):
224a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  """ Calculate the integral according to the trapezoidal rule
225a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
226a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  TrapezoidalRule approximates the definite integral of f from a to b by
227a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  the composite trapezoidal rule, using n subintervals.
228a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  http://en.wikipedia.org/wiki/Trapezoidal_rule#Uniform_grid
229a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
230a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  Args:
231a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    data: A list of samples
232a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    dx: The uniform distance along the x axis between any two samples
233a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
234a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  Returns:
235a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    The area under the curve defined by the samples and the uniform distance
236a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    according to the trapezoidal rule.
237a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  """
238a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
239a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  n = len(data) - 1
240a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  s = data[0] + data[n]
241a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
242a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  if n == 0:
243a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    return 0.0
244a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
245a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  for i in range(1, n):
246a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    s += 2 * data[i]
247a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
248a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  return s * dx / 2.0
249a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
2504e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)def Total(data):
2514e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  """Returns the float value of a number or the sum of a list."""
2524e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  if type(data) == float:
2534e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    total = data
2544e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  elif type(data) == int:
2554e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    total = float(data)
2564e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  elif type(data) == list:
2574e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    total = float(sum(data))
2584e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  else:
2594e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    raise TypeError
2604e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  return total
2614e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
2624e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
2634e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)def DivideIfPossibleOrZero(numerator, denominator):
2644e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  """Returns the quotient, or zero if the denominator is zero."""
2654e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  if not denominator:
2664e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    return 0.0
2674e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  else:
2684e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    return numerator / denominator
2694e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
2704e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
2714e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)def GeneralizedMean(values, exponent):
2724e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  """See http://en.wikipedia.org/wiki/Generalized_mean"""
2734e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  if not values:
2744e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    return 0.0
2754e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  sum_of_powers = 0.0
2764e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  for v in values:
2774e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    sum_of_powers += v ** exponent
2784e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  return (sum_of_powers / len(values)) ** (1.0/exponent)
2794e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
2804e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
2814e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)def Median(values):
2824e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  """Gets the median of a list of values."""
2834e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  return Percentile(values, 50)
2844e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
2854e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
2864e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)def Percentile(values, percentile):
2874e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  """Calculates the value below which a given percentage of values fall.
2884e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
2894e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  For example, if 17% of the values are less than 5.0, then 5.0 is the 17th
2904e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  percentile for this set of values. When the percentage doesn't exactly
2914e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  match a rank in the list of values, the percentile is computed using linear
2924e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  interpolation between closest ranks.
2934e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
2944e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  Args:
2954e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    values: A list of numerical values.
2964e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    percentile: A number between 0 and 100.
2974e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
2984e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  Returns:
2994e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    The Nth percentile for the list of values, where N is the given percentage.
3004e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  """
3014e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  if not values:
3024e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    return 0.0
3034e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  sorted_values = sorted(values)
3044e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  n = len(values)
3054e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  percentile /= 100.0
3064e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  if percentile <= 0.5 / n:
3074e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    return sorted_values[0]
3084e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  elif percentile >= (n - 0.5) / n:
3094e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    return sorted_values[-1]
3104e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  else:
3114e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    floor_index = int(math.floor(n * percentile -  0.5))
3124e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    floor_value = sorted_values[floor_index]
3134e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    ceil_value = sorted_values[floor_index+1]
3144e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    alpha = n * percentile - 0.5 - floor_index
3154e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    return floor_value + alpha * (ceil_value - floor_value)
3164e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
317f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
318f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)def GeometricMean(values):
319f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  """Compute a rounded geometric mean from an array of values."""
320f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  if not values:
321f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    return None
322f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  # To avoid infinite value errors, make sure no value is less than 0.001.
323f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  new_values = []
324f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  for value in values:
325f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    if value > 0.001:
326f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      new_values.append(value)
327f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    else:
328f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      new_values.append(0.001)
329f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  # Compute the sum of the log of the values.
330f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  log_sum = sum(map(math.log, new_values))
331f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  # Raise e to that sum over the number of values.
332f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  mean = math.pow(math.e, (log_sum / len(new_values)))
333f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  # Return the rounded mean.
334f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  return int(round(mean))
335