104dc5dc8547dbfbe524cf35ac39537346ad749bbYunlian Jiang# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
204dc5dc8547dbfbe524cf35ac39537346ad749bbYunlian Jiang# Use of this source code is governed by a BSD-style license that can be
304dc5dc8547dbfbe524cf35ac39537346ad749bbYunlian Jiang# found in the LICENSE file.
40d39863d8578fb1c6e7077b2c7cc5ca79edcf4ddHan Shen"""Define a type that wraps a Benchmark instance."""
5f6ef4395fe1896ba68c80e52cb24763b0fcfe7f8Caroline Ticefrom __future__ import print_function
60dcbc4b1714260820fd4b8d6536fbb05e139cc0fAhmad Sharif
78332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huangimport math
88332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huangfrom scipy import stats
98332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huang
108332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huang# See crbug.com/673558 for how these are estimated.
118332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huang_estimated_stddev = {
128332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huang    'octane': 0.015,
138332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huang    'kraken': 0.019,
148332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huang    'speedometer': 0.007,
158332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huang    'dromaeo.domcoreattr': 0.023,
168332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huang    'dromaeo.domcoremodify': 0.011,
178332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huang    'smoothness.tough_webgl_cases': 0.025,
188332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huang    'graphics_WebGLAquarium': 0.008,
198332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huang    'page_cycler_v2.typical_25': 0.021,
208332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huang}
218332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huang
22f6ef4395fe1896ba68c80e52cb24763b0fcfe7f8Caroline Tice
238332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huang# Get #samples needed to guarantee a given confidence interval, assuming the
248332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huang# samples follow normal distribution.
258332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huangdef _samples(b):
26f6ef4395fe1896ba68c80e52cb24763b0fcfe7f8Caroline Tice  # TODO: Make this an option
27f6ef4395fe1896ba68c80e52cb24763b0fcfe7f8Caroline Tice  # CI = (0.9, 0.02), i.e., 90% chance that |sample mean - true mean| < 2%.
28f6ef4395fe1896ba68c80e52cb24763b0fcfe7f8Caroline Tice  p = 0.9
29f6ef4395fe1896ba68c80e52cb24763b0fcfe7f8Caroline Tice  e = 0.02
30f6ef4395fe1896ba68c80e52cb24763b0fcfe7f8Caroline Tice  if b not in _estimated_stddev:
31f6ef4395fe1896ba68c80e52cb24763b0fcfe7f8Caroline Tice    return 1
32f6ef4395fe1896ba68c80e52cb24763b0fcfe7f8Caroline Tice  d = _estimated_stddev[b]
33f6ef4395fe1896ba68c80e52cb24763b0fcfe7f8Caroline Tice  # Get at least 2 samples so as to calculate standard deviation, which is
34f6ef4395fe1896ba68c80e52cb24763b0fcfe7f8Caroline Tice  # needed in T-test for p-value.
35f6ef4395fe1896ba68c80e52cb24763b0fcfe7f8Caroline Tice  n = int(math.ceil((stats.norm.isf((1 - p) / 2) * d / e)**2))
36f6ef4395fe1896ba68c80e52cb24763b0fcfe7f8Caroline Tice  return n if n > 1 else 2
37f6ef4395fe1896ba68c80e52cb24763b0fcfe7f8Caroline Tice
388332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huang
390dcbc4b1714260820fd4b8d6536fbb05e139cc0fAhmad Sharifclass Benchmark(object):
400dcbc4b1714260820fd4b8d6536fbb05e139cc0fAhmad Sharif  """Class representing a benchmark to be run.
410dcbc4b1714260820fd4b8d6536fbb05e139cc0fAhmad Sharif
4204dc5dc8547dbfbe524cf35ac39537346ad749bbYunlian Jiang  Contains details of the benchmark suite, arguments to pass to the suite,
4304dc5dc8547dbfbe524cf35ac39537346ad749bbYunlian Jiang  iterations to run the benchmark suite and so on. Note that the benchmark name
4404dc5dc8547dbfbe524cf35ac39537346ad749bbYunlian Jiang  can be different to the test suite name. For example, you may want to have
4504dc5dc8547dbfbe524cf35ac39537346ad749bbYunlian Jiang  two different benchmarks which run the same test_name with different
460dcbc4b1714260820fd4b8d6536fbb05e139cc0fAhmad Sharif  arguments.
470dcbc4b1714260820fd4b8d6536fbb05e139cc0fAhmad Sharif  """
480dcbc4b1714260820fd4b8d6536fbb05e139cc0fAhmad Sharif
49f2a3ef46f75d2196a93d3ed27f4d1fcf22b54fbeLuis Lozano  def __init__(self,
50f2a3ef46f75d2196a93d3ed27f4d1fcf22b54fbeLuis Lozano               name,
51f2a3ef46f75d2196a93d3ed27f4d1fcf22b54fbeLuis Lozano               test_name,
52f2a3ef46f75d2196a93d3ed27f4d1fcf22b54fbeLuis Lozano               test_args,
53f2a3ef46f75d2196a93d3ed27f4d1fcf22b54fbeLuis Lozano               iterations,
54f2a3ef46f75d2196a93d3ed27f4d1fcf22b54fbeLuis Lozano               rm_chroot_tmp,
55f2a3ef46f75d2196a93d3ed27f4d1fcf22b54fbeLuis Lozano               perf_args,
56f2a3ef46f75d2196a93d3ed27f4d1fcf22b54fbeLuis Lozano               suite='',
57f2a3ef46f75d2196a93d3ed27f4d1fcf22b54fbeLuis Lozano               show_all_results=False,
58f2a3ef46f75d2196a93d3ed27f4d1fcf22b54fbeLuis Lozano               retries=0,
59f2a3ef46f75d2196a93d3ed27f4d1fcf22b54fbeLuis Lozano               run_local=False):
600dcbc4b1714260820fd4b8d6536fbb05e139cc0fAhmad Sharif    self.name = name
6104dc5dc8547dbfbe524cf35ac39537346ad749bbYunlian Jiang    #For telemetry, this is the benchmark name.
6204dc5dc8547dbfbe524cf35ac39537346ad749bbYunlian Jiang    self.test_name = test_name
6304dc5dc8547dbfbe524cf35ac39537346ad749bbYunlian Jiang    #For telemetry, this is the data.
6404dc5dc8547dbfbe524cf35ac39537346ad749bbYunlian Jiang    self.test_args = test_args
658332364c0237ca6c4976c5206346ab9a596c8e98Ting-Yuan Huang    self.iterations = iterations if iterations > 0 else _samples(name)
66f395c26437cbdabc2960447fba89b226f4409e82Ahmad Sharif    self.perf_args = perf_args
67f81680c018729fd4499e1e200d04b48c4b90127cLuis Lozano    self.rm_chroot_tmp = rm_chroot_tmp
684467f004e7f0854963bec90daff1879fbd9d2fecAhmad Sharif    self.iteration_adjusted = False
6904dc5dc8547dbfbe524cf35ac39537346ad749bbYunlian Jiang    self.suite = suite
709847df92a2b5f76ccddc4bf10288819712a8ca47cmtice    self.show_all_results = show_all_results
71df76222bf1cbdbc42bb41934fb960bac8694eca4Luis Lozano    self.retries = retries
72f2a3ef46f75d2196a93d3ed27f4d1fcf22b54fbeLuis Lozano    if self.suite == 'telemetry':
73226e3e08106dffd3086a7eee7007950a1d8ddfafcmtice      self.show_all_results = True
74bc2d3d13d2e1928217140a76acdf9f9917b1fa30Ting-Yuan Huang    if run_local and self.suite != 'telemetry_Crosperf':
759099a788cd7124024559c064e425ed9caef6e0acCaroline Tice      raise RuntimeError('run_local is only supported by telemetry_Crosperf.')
76bc2d3d13d2e1928217140a76acdf9f9917b1fa30Ting-Yuan Huang    self.run_local = run_local
77