1a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)# Copyright 2014 The Chromium Authors. All rights reserved. 24e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)# Use of this source code is governed by a BSD-style license that can be 34e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)# found in the LICENSE file. 44e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 54e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)"""A collection of statistical utility functions to be used by metrics.""" 64e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 74e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)import math 84e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 94e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 104e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)def Clamp(value, low=0.0, high=1.0): 114e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) """Clamp a value between some low and high value.""" 124e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) return min(max(value, low), high) 134e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 144e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 154e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)def NormalizeSamples(samples): 164e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) """Sorts the samples, and map them linearly to the range [0,1]. 174e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 184e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) They're mapped such that for the N samples, the first sample is 0.5/N and the 194e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) last sample is (N-0.5)/N. 204e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 214e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) Background: The discrepancy of the sample set i/(N-1); i=0, ..., N-1 is 2/N, 224e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) twice the discrepancy of the sample set (i+1/2)/N; i=0, ..., N-1. In our case 234e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) we don't want to distinguish between these two cases, as our original domain 244e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) is not bounded (it is for Monte Carlo integration, where discrepancy was 254e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) first used). 264e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) """ 274e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) if not samples: 284e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) return samples, 1.0 294e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) samples = sorted(samples) 304e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) low = min(samples) 314e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) high = max(samples) 324e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) new_low = 0.5 / len(samples) 334e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) new_high = (len(samples)-0.5) / len(samples) 344e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) if high-low == 0.0: 35e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch return [0.5] * len(samples), 1.0 364e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) scale = (new_high - new_low) / (high - low) 374e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) for i in xrange(0, len(samples)): 384e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) samples[i] = float(samples[i] - low) * scale + new_low 394e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) return samples, scale 404e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 414e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 42e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdochdef Discrepancy(samples, location_count=None): 434e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) """Computes the discrepancy of a set of 1D samples from the interval [0,1]. 444e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 45e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch The samples must be sorted. We define the discrepancy of an empty set 46e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch of samples to be zero. 474e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 484e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) http://en.wikipedia.org/wiki/Low-discrepancy_sequence 494e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) http://mathworld.wolfram.com/Discrepancy.html 504e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) """ 514e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) if not samples: 52e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch return 0.0 534e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 544e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) max_local_discrepancy = 0 55e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch inv_sample_count = 1.0 / len(samples) 564e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) locations = [] 574e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) # For each location, stores the number of samples less than that location. 58e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch count_less = [] 594e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) # For each location, stores the number of samples less than or equal to that 604e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) # location. 61e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch count_less_equal = [] 62e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch 63e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch if location_count: 64e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch # Generate list of equally spaced locations. 65e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch sample_index = 0 66e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch for i in xrange(0, int(location_count)): 67e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch location = float(i) / (location_count-1) 68e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch locations.append(location) 69e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch while sample_index < len(samples) and samples[sample_index] < location: 70e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch sample_index += 1 71e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch count_less.append(sample_index) 72e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch while sample_index < len(samples) and samples[sample_index] <= location: 73e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch sample_index += 1 74e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch count_less_equal.append(sample_index) 75e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch else: 76e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch # Populate locations with sample positions. Append 0 and 1 if necessary. 77e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch if samples[0] > 0.0: 78e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch locations.append(0.0) 79e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch count_less.append(0) 80e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch count_less_equal.append(0) 81e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch for i in xrange(0, len(samples)): 82e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch locations.append(samples[i]) 83e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch count_less.append(i) 84e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch count_less_equal.append(i+1) 85e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch if samples[-1] < 1.0: 86e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch locations.append(1.0) 87e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch count_less.append(len(samples)) 88e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch count_less_equal.append(len(samples)) 894e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 904e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) # Iterate over the intervals defined by any pair of locations. 914e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) for i in xrange(0, len(locations)): 92e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch for j in xrange(i+1, len(locations)): 93e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch # Length of interval 944e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) length = locations[j] - locations[i] 954e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 96e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch # Local discrepancy for closed interval 97e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch count_closed = count_less_equal[j] - count_less[i] 98e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch local_discrepancy_closed = abs(float(count_closed) * 99e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch inv_sample_count - length) 100e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch max_local_discrepancy = max(local_discrepancy_closed, 101e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch max_local_discrepancy) 102e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch 103e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch # Local discrepancy for open interval 104e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch count_open = count_less[j] - count_less_equal[i] 105e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch local_discrepancy_open = abs(float(count_open) * 106e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch inv_sample_count - length) 107e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch max_local_discrepancy = max(local_discrepancy_open, 108e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch max_local_discrepancy) 1094e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 1104e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) return max_local_discrepancy 1114e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 1124e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 113a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)def TimestampsDiscrepancy(timestamps, absolute=True, 114e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch location_count=None): 115a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) """A discrepancy based metric for measuring timestamp jank. 1164e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 117a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) TimestampsDiscrepancy quantifies the largest area of jank observed in a series 118a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) of timestamps. Note that this is different from metrics based on the 119a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) max_time_interval. For example, the time stamp series A = [0,1,2,3,5,6] and 120a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) B = [0,1,2,3,5,7] have the same max_time_interval = 2, but 1214e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) Discrepancy(B) > Discrepancy(A). 1224e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 1234e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) Two variants of discrepancy can be computed: 1244e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 1254e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) Relative discrepancy is following the original definition of 1264e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) discrepancy. It characterized the largest area of jank, relative to the 1274e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) duration of the entire time stamp series. We normalize the raw results, 1284e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) because the best case discrepancy for a set of N samples is 1/N (for 1294e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) equally spaced samples), and we want our metric to report 0.0 in that 1304e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) case. 1314e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 1324e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) Absolute discrepancy also characterizes the largest area of jank, but its 1334e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) value wouldn't change (except for imprecisions due to a low 134a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) |interval_multiplier|) if additional 'good' intervals were added to an 1354e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) exisiting list of time stamps. Its range is [0,inf] and the unit is 1364e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) milliseconds. 1374e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 1384e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) The time stamp series C = [0,2,3,4] and D = [0,2,3,4,5] have the same 1394e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) absolute discrepancy, but D has lower relative discrepancy than C. 1405d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 141a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) |timestamps| may be a list of lists S = [S_1, S_2, ..., S_N], where each 1425d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) S_i is a time stamp series. In that case, the discrepancy D(S) is: 1435d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) D(S) = max(D(S_1), D(S_2), ..., D(S_N)) 1444e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) """ 145a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) if not timestamps: 146e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch return 0.0 1475d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 148a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) if isinstance(timestamps[0], list): 149a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) range_discrepancies = [TimestampsDiscrepancy(r) for r in timestamps] 1505d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) return max(range_discrepancies) 1515d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 152a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) samples, sample_scale = NormalizeSamples(timestamps) 153e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch discrepancy = Discrepancy(samples, location_count) 1544e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) inv_sample_count = 1.0 / len(samples) 1554e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) if absolute: 1564e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) # Compute absolute discrepancy 1574e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) discrepancy /= sample_scale 1584e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) else: 1594e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) # Compute relative discrepancy 1604e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) discrepancy = Clamp((discrepancy-inv_sample_count) / (1.0-inv_sample_count)) 1614e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) return discrepancy 1624e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 1634e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 164a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)def DurationsDiscrepancy(durations, absolute=True, 165e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch location_count=None): 166a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) """A discrepancy based metric for measuring duration jank. 167a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 168a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) DurationsDiscrepancy computes a jank metric which measures how irregular a 169a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) given sequence of intervals is. In order to minimize jank, each duration 170a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) should be equally long. This is similar to how timestamp jank works, 171a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) and we therefore reuse the timestamp discrepancy function above to compute a 172a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) similar duration discrepancy number. 173a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 174a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) Because timestamp discrepancy is defined in terms of timestamps, we first 175a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) convert the list of durations to monotonically increasing timestamps. 176a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 177a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) Args: 178a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) durations: List of interval lengths in milliseconds. 179a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) absolute: See TimestampsDiscrepancy. 180a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) interval_multiplier: See TimestampsDiscrepancy. 181a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) """ 182e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch if not durations: 183e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch return 0.0 184e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch 185e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch timestamps = reduce(lambda x, y: x + [x[-1] + y], durations, [0]) 186e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch return TimestampsDiscrepancy(timestamps, absolute, location_count) 1874e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 188a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 189a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)def ArithmeticMean(data): 190a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) """Calculates arithmetic mean. 1914e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 1924e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) Args: 193a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) data: A list of samples. 1944e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 1954e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) Returns: 196a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) The arithmetic mean value, or 0 if the list is empty. 1974e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) """ 198a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) numerator_total = Total(data) 199a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) denominator_total = Total(len(data)) 2004e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) return DivideIfPossibleOrZero(numerator_total, denominator_total) 2014e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 2024e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 203a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)def StandardDeviation(data): 204a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) """Calculates the standard deviation. 205a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 206a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) Args: 207a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) data: A list of samples. 208a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 209a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) Returns: 210a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) The standard deviation of the samples provided. 211a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) """ 212a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) if len(data) == 1: 213a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) return 0.0 214a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 215a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) mean = ArithmeticMean(data) 216a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) variances = [float(x) - mean for x in data] 217a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) variances = [x * x for x in variances] 218a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) std_dev = math.sqrt(ArithmeticMean(variances)) 219a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 220a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) return std_dev 221a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 222a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 223a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)def TrapezoidalRule(data, dx): 224a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) """ Calculate the integral according to the trapezoidal rule 225a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 226a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) TrapezoidalRule approximates the definite integral of f from a to b by 227a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) the composite trapezoidal rule, using n subintervals. 228a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) http://en.wikipedia.org/wiki/Trapezoidal_rule#Uniform_grid 229a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 230a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) Args: 231a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) data: A list of samples 232a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) dx: The uniform distance along the x axis between any two samples 233a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 234a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) Returns: 235a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) The area under the curve defined by the samples and the uniform distance 236a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) according to the trapezoidal rule. 237a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) """ 238a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 239a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) n = len(data) - 1 240a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) s = data[0] + data[n] 241a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 242a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) if n == 0: 243a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) return 0.0 244a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 245a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) for i in range(1, n): 246a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) s += 2 * data[i] 247a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 248a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) return s * dx / 2.0 249a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 2504e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)def Total(data): 2514e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) """Returns the float value of a number or the sum of a list.""" 2524e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) if type(data) == float: 2534e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) total = data 2544e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) elif type(data) == int: 2554e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) total = float(data) 2564e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) elif type(data) == list: 2574e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) total = float(sum(data)) 2584e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) else: 2594e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) raise TypeError 2604e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) return total 2614e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 2624e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 2634e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)def DivideIfPossibleOrZero(numerator, denominator): 2644e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) """Returns the quotient, or zero if the denominator is zero.""" 2654e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) if not denominator: 2664e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) return 0.0 2674e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) else: 2684e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) return numerator / denominator 2694e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 2704e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 2714e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)def GeneralizedMean(values, exponent): 2724e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) """See http://en.wikipedia.org/wiki/Generalized_mean""" 2734e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) if not values: 2744e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) return 0.0 2754e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) sum_of_powers = 0.0 2764e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) for v in values: 2774e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) sum_of_powers += v ** exponent 2784e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) return (sum_of_powers / len(values)) ** (1.0/exponent) 2794e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 2804e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 2814e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)def Median(values): 2824e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) """Gets the median of a list of values.""" 2834e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) return Percentile(values, 50) 2844e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 2854e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 2864e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)def Percentile(values, percentile): 2874e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) """Calculates the value below which a given percentage of values fall. 2884e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 2894e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) For example, if 17% of the values are less than 5.0, then 5.0 is the 17th 2904e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) percentile for this set of values. When the percentage doesn't exactly 2914e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) match a rank in the list of values, the percentile is computed using linear 2924e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) interpolation between closest ranks. 2934e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 2944e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) Args: 2954e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) values: A list of numerical values. 2964e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) percentile: A number between 0 and 100. 2974e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 2984e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) Returns: 2994e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) The Nth percentile for the list of values, where N is the given percentage. 3004e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) """ 3014e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) if not values: 3024e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) return 0.0 3034e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) sorted_values = sorted(values) 3044e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) n = len(values) 3054e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) percentile /= 100.0 3064e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) if percentile <= 0.5 / n: 3074e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) return sorted_values[0] 3084e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) elif percentile >= (n - 0.5) / n: 3094e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) return sorted_values[-1] 3104e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) else: 3114e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) floor_index = int(math.floor(n * percentile - 0.5)) 3124e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) floor_value = sorted_values[floor_index] 3134e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) ceil_value = sorted_values[floor_index+1] 3144e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) alpha = n * percentile - 0.5 - floor_index 3154e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) return floor_value + alpha * (ceil_value - floor_value) 3164e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 317f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 318f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)def GeometricMean(values): 319f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) """Compute a rounded geometric mean from an array of values.""" 320f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) if not values: 321f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) return None 322f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) # To avoid infinite value errors, make sure no value is less than 0.001. 323f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) new_values = [] 324f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) for value in values: 325f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) if value > 0.001: 326f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) new_values.append(value) 327f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) else: 328f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) new_values.append(0.001) 329f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) # Compute the sum of the log of the values. 330f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) log_sum = sum(map(math.log, new_values)) 331f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) # Raise e to that sum over the number of values. 332f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) mean = math.pow(math.e, (log_sum / len(new_values))) 333f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) # Return the rounded mean. 334f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) return int(round(mean)) 335