1#!/usr/bin/env python
2# Copyright (c) 2011 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Contains two functions that run different test cases and the same test
7case in parallel repeatedly to identify flaky tests.
8"""
9
10
11import os
12import re
13import subprocess
14import time
15
16
17# Defaults for FindShardingFlakiness().
18FF_DATA_SUFFIX = '_flakies'
19FF_SLEEP_INTERVAL = 10.0
20FF_NUM_ITERATIONS = 100
21FF_SUPERVISOR_ARGS = ['-r3', '--random-seed']
22
23# Defaults for FindUnaryFlakiness().
24FF_OUTPUT_SUFFIX = '_purges'
25FF_NUM_PROCS = 20
26FF_NUM_REPEATS = 10
27FF_TIMEOUT = 600
28
29
30def FindShardingFlakiness(test_path, data_path, supervisor_args):
31  """Finds flaky test cases by sharding and running a test for the specified
32  number of times. The data file is read at the beginning of each run to find
33  the last known counts and is overwritten at the end of each run with the new
34  counts. There is an optional sleep interval between each run so the script can
35  be killed without losing the data, useful for overnight (or weekend!) runs.
36  """
37
38  failed_tests = {}
39  # Read a previously written data file.
40  if os.path.exists(data_path):
41    data_file = open(data_path, 'r')
42    num_runs = int(data_file.readline().split(' ')[0])
43    num_passes = int(data_file.readline().split(' ')[0])
44    for line in data_file:
45      if line:
46        split_line = line.split(' -> ')
47        failed_tests[split_line[0]] = int(split_line[1])
48    data_file.close()
49  # No data file found.
50  else:
51    num_runs = 0
52    num_passes = 0
53
54  log_lines = False
55  args = ['python', '../sharding_supervisor/sharding_supervisor.py']
56  args.extend(supervisor_args + [test_path])
57  proc = subprocess.Popen(args, stderr=subprocess.PIPE)
58
59  # Shard the test and collect failures.
60  while True:
61    line = proc.stderr.readline()
62    if not line:
63      if proc.poll() is not None:
64        break
65      continue
66    print line.rstrip()
67    if log_lines:
68      line = line.rstrip()
69      if line in failed_tests:
70        failed_tests[line] += 1
71      else:
72        failed_tests[line] = 1
73    elif line.find('FAILED TESTS:') >= 0:
74      log_lines = True
75  num_runs += 1
76  if proc.returncode == 0:
77    num_passes += 1
78
79  # Write the data file and print results.
80  data_file = open(data_path, 'w')
81  print '%i runs' % num_runs
82  data_file.write('%i runs\n' % num_runs)
83  print '%i passes' % num_passes
84  data_file.write('%i passes\n' % num_passes)
85  for (test, count) in failed_tests.iteritems():
86    print '%s -> %i' % (test, count)
87    data_file.write('%s -> %i\n' % (test, count))
88  data_file.close()
89
90
91def FindUnaryFlakiness(test_path, output_path, num_procs, num_repeats, timeout):
92  """Runs all the test cases in a given test in parallel with itself, to get at
93  those that hold on to shared resources. The idea is that if a test uses a
94  unary resource, then running many instances of this test will purge out some
95  of them as failures or timeouts.
96  """
97
98  test_name_regex = r'((\w+/)?\w+\.\w+(/\d+)?)'
99  test_start = re.compile('\[\s+RUN\s+\] ' + test_name_regex)
100  test_list = []
101
102  # Run the test to discover all the test cases.
103  proc = subprocess.Popen([test_path], stdout=subprocess.PIPE)
104  while True:
105    line = proc.stdout.readline()
106    if not line:
107      if proc.poll() is not None:
108        break
109      continue
110    print line.rstrip()
111    results = test_start.search(line)
112    if results:
113      test_list.append(results.group(1))
114
115  failures = []
116  index = 0
117  total = len(test_list)
118
119  # Run each test case in parallel with itself.
120  for test_name in test_list:
121    num_fails = 0
122    num_terminated = 0
123    procs = []
124    args = [test_path, '--gtest_filter=' + test_name,
125            '--gtest_repeat=%i' % num_repeats]
126    while len(procs) < num_procs:
127      procs.append(subprocess.Popen(args))
128    seconds = 0
129    while procs:
130      for proc in procs:
131        if proc.poll() is not None:
132          if proc.returncode != 0:
133            ++num_fails
134          procs.remove(proc)
135      # Timeout exceeded, kill the remaining processes and make a note.
136      if seconds > timeout:
137        num_fails += len(procs)
138        num_terminated = len(procs)
139        while procs:
140          procs.pop().terminate()
141      time.sleep(1.0)
142      seconds += 1
143    if num_fails:
144      line = '%s: %i failed' % (test_name, num_fails)
145      if num_terminated:
146        line += ' (%i terminated)' % num_terminated
147      failures.append(line)
148    print '%s (%i / %i): %i failed' % (test_name, index, total, num_fails)
149    index += 1
150    time.sleep(1.0)
151
152  # Print the results and write the data file.
153  print failures
154  data_file = open(output_path, 'w')
155  for line in failures:
156    data_file.write(line + '\n')
157  data_file.close()
158
159
160def main():
161  if not args:
162    parser.error('You must specify a path to test!')
163  if not os.path.exists(args[0]):
164    parser.error('%s does not exist!' % args[0])
165
166  data_path = os.path.basename(args[0]) + FF_DATA_SUFFIX
167  output_path = os.path.basename(args[0]) + FF_OUTPUT_SUFFIX
168
169  for i in range(FF_NUM_ITERATIONS):
170    FindShardingFlakiness(args[0], data_path, FF_SUPERVISOR_ARGS)
171    print 'That was just iteration %i of %i.' % (i + 1, FF_NUM_ITERATIONS)
172    time.sleep(FF_SLEEP_INTERVAL)
173
174  FindUnaryFlakiness(
175      args[0], output_path, FF_NUM_PROCS, FF_NUM_REPEATS, FF_TIMEOUT)
176
177
178if __name__ == '__main__':
179  main()
180