testing_batch.py revision 7b8b2d11d0ac87ea3a9ecad976b2214e1c680bc5
1# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Hill climbing unitest.
6
7Part of the Chrome build flags optimization.
8
9Test the best branching hill climbing algorithms, genetic algorithm and
10iterative elimination algorithm.
11"""
12
13__author__ = 'yuhenglong@google.com (Yuheng Long)'
14
15import multiprocessing
16import random
17import sys
18import unittest
19
20import flags
21from flags import Flag
22from flags import FlagSet
23from genetic_algorithm import GAGeneration
24from genetic_algorithm import GATask
25from hill_climb_best_neighbor import HillClimbingBestBranch
26from iterative_elimination import IterativeEliminationFirstGeneration
27import pipeline_process
28from steering import Steering
29from task import BUILD_STAGE
30from task import Task
31from task import TEST_STAGE
32
33
34# The number of flags be tested.
35NUM_FLAGS = 5
36
37# The value range of the flags.
38FLAG_RANGES = 10
39
40# The following variables are meta data for the Genetic Algorithm.
41STOP_THRESHOLD = 20
42NUM_CHROMOSOMES = 10
43NUM_TRIALS = 20
44MUTATION_RATE = 0.03
45
46
47def _GenerateRandomRasks(specs):
48  """Generate a task that has random values.
49
50  Args:
51    specs: A list of spec from which the flag set is created.
52
53  Returns:
54    A set containing a task that has random values.
55  """
56
57  flag_set = []
58
59  for spec in specs:
60    numeric_flag_match = flags.Search(spec)
61    if numeric_flag_match:
62      # Numeric flags.
63      start = int(numeric_flag_match.group('start'))
64      end = int(numeric_flag_match.group('end'))
65
66      value = random.randint(start - 1, end - 1)
67      if value != start - 1:
68        # If the value falls in the range, this flag is enabled.
69        flag_set.append(Flag(spec, value))
70    else:
71      # Boolean flags.
72      if random.randint(0, 1):
73        flag_set.append(Flag(spec))
74
75  return set([Task(FlagSet(flag_set))])
76
77
78def _GenerateAllFlagsTasks(specs):
79  """Generate a task that all the flags are enable.
80
81  All the boolean flags in the specs will be enabled and all the numeric flag
82  with have the largest legal value.
83
84  Args:
85    specs: A list of spec from which the flag set is created.
86
87  Returns:
88    A set containing a task that has all flags enabled.
89  """
90
91  flag_set = []
92
93  for spec in specs:
94    numeric_flag_match = flags.Search(spec)
95
96    if numeric_flag_match:
97      value = (int(numeric_flag_match.group('end')) - 1)
98    else:
99      value = -1
100    flag_set.append(Flag(spec, value))
101
102  return set([Task(FlagSet(flag_set))])
103
104
105def _GenerateNoFlagTask():
106  return set([Task(FlagSet([]))])
107
108
109def GenerateRandomGATasks(specs, num_tasks, num_trials):
110  """Generate a set of tasks for the Genetic Algorithm.
111
112  Args:
113    specs: A list of spec from which the flag set is created.
114    num_tasks: number of tasks that should be generated.
115    num_trials: the maximum number of tries should be attempted to generate the
116      set of tasks.
117
118  Returns:
119    A set of randomly generated tasks.
120  """
121
122  tasks = set([])
123
124  total_trials = 0
125  while len(tasks) < num_tasks and total_trials < num_trials:
126    new_flag = FlagSet([Flag(spec) for spec in specs if random.randint(0, 1)])
127    new_task = GATask(new_flag)
128
129    if new_task in tasks:
130      total_trials += 1
131    else:
132      tasks.add(new_task)
133      total_trials = 0
134
135  return tasks
136
137
138def _GenerateInitialFlags(specs, spec):
139  """Generate the flag_set of a task in the flag elimination algorithm.
140
141  Set the value of all the flags to the largest value, except for the flag that
142  contains spec.
143
144  For example, if the specs are [-finline-limit=[1-1000], -fstrict-aliasing] and
145  the spec is -finline-limit=[1-1000], then the result is
146  [-finline-limit=[1-1000]:-finline-limit=998,
147   -fstrict-aliasing:-fstrict-aliasing]
148
149  Args:
150    specs: an array of specifications from which the result flag_set is created.
151      The flag_set contains one and only one flag that contain the specification
152      spec.
153    spec: The flag containing this spec should have a value that is smaller than
154      the highest value the flag can have.
155
156  Returns:
157    An array of flags, each of which contains one spec in specs. All the values
158    of the flags are the largest values in specs, expect the one that contains
159    spec.
160  """
161
162  flag_set = []
163  for other_spec in specs:
164    numeric_flag_match = flags.Search(other_spec)
165    # Found the spec in the array specs.
166    if other_spec == spec:
167      # Numeric flag will have a value that is smaller than the largest value
168      # and Boolean flag will be deleted.
169      if numeric_flag_match:
170        end = int(numeric_flag_match.group('end'))
171        flag_set.append(flags.Flag(other_spec, end - 2))
172
173      continue
174
175    # other_spec != spec
176    if numeric_flag_match:
177      # numeric flag
178      end = int(numeric_flag_match.group('end'))
179      flag_set.append(flags.Flag(other_spec, end - 1))
180      continue
181
182    # boolean flag
183    flag_set.append(flags.Flag(other_spec))
184
185  return flag_set
186
187
188def _GenerateAllIterativeEliminationTasks(specs):
189  """Generate the initial tasks for the negative flag elimination algorithm.
190
191  Generate the base line task that turns on all the boolean flags and sets the
192  value to be the largest value for the numeric flag.
193
194  For example, if the specs are [-finline-limit=[1-1000], -fstrict-aliasing],
195  the base line is [-finline-limit=[1-1000]:-finline-limit=999,
196  -fstrict-aliasing:-fstrict-aliasing]
197
198  Generate a set of task, each turns off one of the flag or sets a value that is
199  smaller than the largest value for the flag.
200
201  Args:
202    specs: an array of specifications from which the result flag_set is created.
203
204  Returns:
205    An array containing one generation of the initial tasks for the negative
206    flag elimination algorithm.
207  """
208
209  # The set of tasks to be generated.
210  results = set([])
211  flag_set = []
212
213  for spec in specs:
214    numeric_flag_match = flags.Search(spec)
215    if numeric_flag_match:
216      # Numeric flag.
217      end_value = int(numeric_flag_match.group('end'))
218      flag_set.append(flags.Flag(spec, end_value - 1))
219      continue
220
221    # Boolean flag.
222    flag_set.append(flags.Flag(spec))
223
224  # The base line task that set all the flags to their largest values.
225  parent_task = Task(flags.FlagSet(flag_set))
226  results.add(parent_task)
227
228  for spec in specs:
229    results.add(Task(flags.FlagSet(_GenerateInitialFlags(specs, spec))))
230
231  return [IterativeEliminationFirstGeneration(results, parent_task)]
232
233
234def _ComputeCost(cost_func, specs, flag_set):
235  """Compute the mock cost of the flag_set using the input cost function.
236
237  All the boolean flags in the specs will be enabled and all the numeric flag
238  with have the largest legal value.
239
240  Args:
241    cost_func: The cost function which is used to compute the mock cost of a
242      dictionary of flags.
243    specs: All the specs that are used in the algorithm. This is used to check
244      whether certain flag is disabled in the flag_set dictionary.
245    flag_set: a dictionary of the spec and flag pairs.
246
247  Returns:
248    The mock cost of the input dictionary of the flags.
249  """
250
251  values = []
252
253  for spec in specs:
254    # If a flag is enabled, its value is added. Otherwise a padding 0 is added.
255    values.append(flag_set[spec].GetValue() if spec in flag_set else 0)
256
257  # The cost function string can use the values array.
258  return eval(cost_func)
259
260
261def _GenerateTestFlags(num_flags, upper_bound, file_name):
262  """Generate a set of mock flags and write it to a configuration file.
263
264  Generate a set of mock flags
265
266  Args:
267    num_flags: Number of numeric flags to be generated.
268    upper_bound: The value of the upper bound of the range.
269    file_name: The configuration file name into which the mock flags are put.
270  """
271
272  with open(file_name, 'w') as output_file:
273    num_flags = int(num_flags)
274    upper_bound = int(upper_bound)
275    for i in range(num_flags):
276      output_file.write('%s=[1-%d]\n' % (i, upper_bound))
277
278
279def _TestAlgorithm(cost_func, specs, generations, best_result):
280  """Test the best result the algorithm should return.
281
282  Set up the framework, run the input algorithm and verify the result.
283
284  Args:
285    cost_func: The cost function which is used to compute the mock cost of a
286      dictionary of flags.
287    specs: All the specs that are used in the algorithm. This is used to check
288      whether certain flag is disabled in the flag_set dictionary.
289    generations: The initial generations to be evaluated.
290    best_result: The expected best result of the algorithm. If best_result is
291      -1, the algorithm may or may not return the best value. Therefore, no
292      assertion will be inserted.
293  """
294
295  # Set up the utilities to test the framework.
296  manager = multiprocessing.Manager()
297  input_queue = manager.Queue()
298  output_queue = manager.Queue()
299  pp_steer = multiprocessing.Process(target=Steering,
300                                     args=(set(), generations, output_queue,
301                                           input_queue))
302  pp_steer.start()
303
304  # The best result of the algorithm so far.
305  result = sys.maxint
306
307  while True:
308    task = input_queue.get()
309
310    # POISONPILL signal the ends of the algorithm.
311    if task == pipeline_process.POISONPILL:
312      break
313
314    task.SetResult(BUILD_STAGE, (0, 0, 0, 0, 0))
315
316    # Compute the mock cost for the task.
317    task_result = _ComputeCost(cost_func, specs, task.GetFlags())
318    task.SetResult(TEST_STAGE, task_result)
319
320    # If the mock result of the current task is the best so far, set this
321    # result to be the best result.
322    if task_result < result:
323      result = task_result
324
325    output_queue.put(task)
326
327  pp_steer.join()
328
329  # Only do this test when best_result is not -1.
330  if best_result != -1:
331    assert best_result == result
332
333
334class MockAlgorithmsTest(unittest.TestCase):
335  """This class mock tests different steering algorithms.
336
337  The steering algorithms are responsible for generating the next set of tasks
338  to run in each iteration. This class does a functional testing on the
339  algorithms. It mocks out the computation of the fitness function from the
340  build and test phases by letting the user define the fitness function.
341  """
342
343  def _GenerateFlagSpecifications(self):
344    """Generate the testing specifications."""
345
346    mock_test_file = 'scale_mock_test'
347    _GenerateTestFlags(NUM_FLAGS, FLAG_RANGES, mock_test_file)
348    return flags.ReadConf(mock_test_file)
349
350  def testBestHillClimb(self):
351    """Test the best hill climb algorithm.
352
353    Test whether it finds the best results as expected.
354    """
355
356    # Initiate the build/test command and the log directory.
357    Task.InitLogCommand(None, None, 'output')
358
359    # Generate the testing specs.
360    specs = self._GenerateFlagSpecifications()
361
362    # Generate the initial generations for a test whose cost function is the
363    # summation of the values of all the flags.
364    generation_tasks = _GenerateAllFlagsTasks(specs)
365    generations = [HillClimbingBestBranch(generation_tasks, set([]), specs)]
366
367    # Test the algorithm. The cost function is the summation of all the values
368    # of all the flags. Therefore, the best value is supposed to be 0, i.e.,
369    # when all the flags are disabled.
370    _TestAlgorithm('sum(values[0:len(values)])', specs, generations, 0)
371
372    # This test uses a cost function that is the negative of the previous cost
373    # function. Therefore, the best result should be found in task with all the
374    # flags enabled.
375    cost_function = 'sys.maxint - sum(values[0:len(values)])'
376    all_flags = list(generation_tasks)[0].GetFlags()
377    cost = _ComputeCost(cost_function, specs, all_flags)
378
379    # Generate the initial generations.
380    generation_tasks = _GenerateNoFlagTask()
381    generations = [HillClimbingBestBranch(generation_tasks, set([]), specs)]
382
383    # Test the algorithm. The cost function is negative of the summation of all
384    # the values of all the flags. Therefore, the best value is supposed to be
385    # 0, i.e., when all the flags are disabled.
386    _TestAlgorithm(cost_function, specs, generations, cost)
387
388  def testGeneticAlgorithm(self):
389    """Test the Genetic Algorithm.
390
391    Do a functional testing here and see how well it scales.
392    """
393
394    # Initiate the build/test command and the log directory.
395    Task.InitLogCommand(None, None, 'output')
396
397    # Generate the testing specs.
398    specs = self._GenerateFlagSpecifications()
399    # Initiate the build/test command and the log directory.
400    GAGeneration.InitMetaData(STOP_THRESHOLD, NUM_CHROMOSOMES, NUM_TRIALS,
401                              specs, MUTATION_RATE)
402
403    # Generate the initial generations.
404    generation_tasks = GenerateRandomGATasks(specs, NUM_CHROMOSOMES,
405                                             NUM_TRIALS)
406    generations = [GAGeneration(generation_tasks, set([]), 0)]
407
408    # Test the algorithm.
409    _TestAlgorithm('sum(values[0:len(values)])', specs, generations, -1)
410    cost_func = 'sys.maxint - sum(values[0:len(values)])'
411    _TestAlgorithm(cost_func, specs, generations, -1)
412
413  def testIterativeElimination(self):
414    """Test the iterative elimination algorithm.
415
416    Test whether it finds the best results as expected.
417    """
418
419    # Initiate the build/test command and the log directory.
420    Task.InitLogCommand(None, None, 'output')
421
422    # Generate the testing specs.
423    specs = self._GenerateFlagSpecifications()
424
425    # Generate the initial generations. The generation contains the base line
426    # task that turns on all the flags and tasks that each turn off one of the
427    # flags.
428    generations = _GenerateAllIterativeEliminationTasks(specs)
429
430    # Test the algorithm. The cost function is the summation of all the values
431    # of all the flags. Therefore, the best value is supposed to be 0, i.e.,
432    # when all the flags are disabled.
433    _TestAlgorithm('sum(values[0:len(values)])', specs, generations, 0)
434
435    # This test uses a cost function that is the negative of the previous cost
436    # function. Therefore, the best result should be found in task with all the
437    # flags enabled.
438    all_flags_tasks = _GenerateAllFlagsTasks(specs)
439    cost_function = 'sys.maxint - sum(values[0:len(values)])'
440    # Compute the cost of the task that turns on all the flags.
441    all_flags = list(all_flags_tasks)[0].GetFlags()
442    cost = _ComputeCost(cost_function, specs, all_flags)
443
444    # Test the algorithm. The cost function is negative of the summation of all
445    # the values of all the flags. Therefore, the best value is supposed to be
446    # 0, i.e., when all the flags are disabled.
447    # The concrete type of the generation decides how the next generation will
448    # be generated.
449    _TestAlgorithm(cost_function, specs, generations, cost)
450
451if __name__ == '__main__':
452  unittest.main()
453