104dc5dc8547dbfbe524cf35ac39537346ad749bbYunlian Jiang# Copyright (c) 2013 The Chromium OS Authors. All rights reserved. 204dc5dc8547dbfbe524cf35ac39537346ad749bbYunlian Jiang# Use of this source code is governed by a BSD-style license that can be 304dc5dc8547dbfbe524cf35ac39537346ad749bbYunlian Jiang# found in the LICENSE file. 40d39863d8578fb1c6e7077b2c7cc5ca79edcf4ddHan Shen"""Define a type that wraps a Benchmark instance.""" 5f6ef4395fe1896ba68c80e52cb24763b0fcfe7f8Caroline Ticefrom __future__ import print_function 60dcbc4b1714260820fd4b8d6536fbb05e139cc0fAhmad Sharif 78332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huangimport math 88332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huangfrom scipy import stats 98332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huang 108332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huang# See crbug.com/673558 for how these are estimated. 118332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huang_estimated_stddev = { 128332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huang 'octane': 0.015, 138332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huang 'kraken': 0.019, 148332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huang 'speedometer': 0.007, 158332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huang 'dromaeo.domcoreattr': 0.023, 168332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huang 'dromaeo.domcoremodify': 0.011, 178332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huang 'smoothness.tough_webgl_cases': 0.025, 188332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huang 'graphics_WebGLAquarium': 0.008, 198332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huang 'page_cycler_v2.typical_25': 0.021, 208332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huang} 218332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huang 22f6ef4395fe1896ba68c80e52cb24763b0fcfe7f8Caroline Tice 238332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huang# Get #samples needed to guarantee a given confidence interval, assuming the 248332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huang# samples follow normal distribution. 258332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huangdef _samples(b): 26f6ef4395fe1896ba68c80e52cb24763b0fcfe7f8Caroline Tice # TODO: Make this an option 27f6ef4395fe1896ba68c80e52cb24763b0fcfe7f8Caroline Tice # CI = (0.9, 0.02), i.e., 90% chance that |sample mean - true mean| < 2%. 28f6ef4395fe1896ba68c80e52cb24763b0fcfe7f8Caroline Tice p = 0.9 29f6ef4395fe1896ba68c80e52cb24763b0fcfe7f8Caroline Tice e = 0.02 30f6ef4395fe1896ba68c80e52cb24763b0fcfe7f8Caroline Tice if b not in _estimated_stddev: 31f6ef4395fe1896ba68c80e52cb24763b0fcfe7f8Caroline Tice return 1 32f6ef4395fe1896ba68c80e52cb24763b0fcfe7f8Caroline Tice d = _estimated_stddev[b] 33f6ef4395fe1896ba68c80e52cb24763b0fcfe7f8Caroline Tice # Get at least 2 samples so as to calculate standard deviation, which is 34f6ef4395fe1896ba68c80e52cb24763b0fcfe7f8Caroline Tice # needed in T-test for p-value. 35f6ef4395fe1896ba68c80e52cb24763b0fcfe7f8Caroline Tice n = int(math.ceil((stats.norm.isf((1 - p) / 2) * d / e)**2)) 36f6ef4395fe1896ba68c80e52cb24763b0fcfe7f8Caroline Tice return n if n > 1 else 2 37f6ef4395fe1896ba68c80e52cb24763b0fcfe7f8Caroline Tice 388332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huang 390dcbc4b1714260820fd4b8d6536fbb05e139cc0fAhmad Sharifclass Benchmark(object): 400dcbc4b1714260820fd4b8d6536fbb05e139cc0fAhmad Sharif """Class representing a benchmark to be run. 410dcbc4b1714260820fd4b8d6536fbb05e139cc0fAhmad Sharif 4204dc5dc8547dbfbe524cf35ac39537346ad749bbYunlian Jiang Contains details of the benchmark suite, arguments to pass to the suite, 4304dc5dc8547dbfbe524cf35ac39537346ad749bbYunlian Jiang iterations to run the benchmark suite and so on. Note that the benchmark name 4404dc5dc8547dbfbe524cf35ac39537346ad749bbYunlian Jiang can be different to the test suite name. For example, you may want to have 4504dc5dc8547dbfbe524cf35ac39537346ad749bbYunlian Jiang two different benchmarks which run the same test_name with different 460dcbc4b1714260820fd4b8d6536fbb05e139cc0fAhmad Sharif arguments. 470dcbc4b1714260820fd4b8d6536fbb05e139cc0fAhmad Sharif """ 480dcbc4b1714260820fd4b8d6536fbb05e139cc0fAhmad Sharif 49f2a3ef46f75d2196a93d3ed27f4d1fcf22b54fbeLuis Lozano def __init__(self, 50f2a3ef46f75d2196a93d3ed27f4d1fcf22b54fbeLuis Lozano name, 51f2a3ef46f75d2196a93d3ed27f4d1fcf22b54fbeLuis Lozano test_name, 52f2a3ef46f75d2196a93d3ed27f4d1fcf22b54fbeLuis Lozano test_args, 53f2a3ef46f75d2196a93d3ed27f4d1fcf22b54fbeLuis Lozano iterations, 54f2a3ef46f75d2196a93d3ed27f4d1fcf22b54fbeLuis Lozano rm_chroot_tmp, 55f2a3ef46f75d2196a93d3ed27f4d1fcf22b54fbeLuis Lozano perf_args, 56f2a3ef46f75d2196a93d3ed27f4d1fcf22b54fbeLuis Lozano suite='', 57f2a3ef46f75d2196a93d3ed27f4d1fcf22b54fbeLuis Lozano show_all_results=False, 58f2a3ef46f75d2196a93d3ed27f4d1fcf22b54fbeLuis Lozano retries=0, 59f2a3ef46f75d2196a93d3ed27f4d1fcf22b54fbeLuis Lozano run_local=False): 600dcbc4b1714260820fd4b8d6536fbb05e139cc0fAhmad Sharif self.name = name 6104dc5dc8547dbfbe524cf35ac39537346ad749bbYunlian Jiang #For telemetry, this is the benchmark name. 6204dc5dc8547dbfbe524cf35ac39537346ad749bbYunlian Jiang self.test_name = test_name 6304dc5dc8547dbfbe524cf35ac39537346ad749bbYunlian Jiang #For telemetry, this is the data. 6404dc5dc8547dbfbe524cf35ac39537346ad749bbYunlian Jiang self.test_args = test_args 658332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huang self.iterations = iterations if iterations > 0 else _samples(name) 66f395c26437cbdabc2960447fba89b226f4409e82Ahmad Sharif self.perf_args = perf_args 67f81680c018729fd4499e1e200d04b48c4b90127cLuis Lozano self.rm_chroot_tmp = rm_chroot_tmp 684467f004e7f0854963bec90daff1879fbd9d2fecAhmad Sharif self.iteration_adjusted = False 6904dc5dc8547dbfbe524cf35ac39537346ad749bbYunlian Jiang self.suite = suite 709847df92a2b5f76ccddc4bf10288819712a8ca47cmtice self.show_all_results = show_all_results 71df76222bf1cbdbc42bb41934fb960bac8694eca4Luis Lozano self.retries = retries 72f2a3ef46f75d2196a93d3ed27f4d1fcf22b54fbeLuis Lozano if self.suite == 'telemetry': 73226e3e08106dffd3086a7eee7007950a1d8ddfafcmtice self.show_all_results = True 74bc2d3d13d2e1928217140a76acdf9f9917b1fa30Ting-Yuan Huang if run_local and self.suite != 'telemetry_Crosperf': 759099a788cd7124024559c064e425ed9caef6e0acCaroline Tice raise RuntimeError('run_local is only supported by telemetry_Crosperf.') 76bc2d3d13d2e1928217140a76acdf9f9917b1fa30Ting-Yuan Huang self.run_local = run_local 77