testing_batch.py revision 7b8b2d11d0ac87ea3a9ecad976b2214e1c680bc5
1# Copyright (c) 2013 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5"""Hill climbing unitest. 6 7Part of the Chrome build flags optimization. 8 9Test the best branching hill climbing algorithms, genetic algorithm and 10iterative elimination algorithm. 11""" 12 13__author__ = 'yuhenglong@google.com (Yuheng Long)' 14 15import multiprocessing 16import random 17import sys 18import unittest 19 20import flags 21from flags import Flag 22from flags import FlagSet 23from genetic_algorithm import GAGeneration 24from genetic_algorithm import GATask 25from hill_climb_best_neighbor import HillClimbingBestBranch 26from iterative_elimination import IterativeEliminationFirstGeneration 27import pipeline_process 28from steering import Steering 29from task import BUILD_STAGE 30from task import Task 31from task import TEST_STAGE 32 33 34# The number of flags be tested. 35NUM_FLAGS = 5 36 37# The value range of the flags. 38FLAG_RANGES = 10 39 40# The following variables are meta data for the Genetic Algorithm. 41STOP_THRESHOLD = 20 42NUM_CHROMOSOMES = 10 43NUM_TRIALS = 20 44MUTATION_RATE = 0.03 45 46 47def _GenerateRandomRasks(specs): 48 """Generate a task that has random values. 49 50 Args: 51 specs: A list of spec from which the flag set is created. 52 53 Returns: 54 A set containing a task that has random values. 55 """ 56 57 flag_set = [] 58 59 for spec in specs: 60 numeric_flag_match = flags.Search(spec) 61 if numeric_flag_match: 62 # Numeric flags. 63 start = int(numeric_flag_match.group('start')) 64 end = int(numeric_flag_match.group('end')) 65 66 value = random.randint(start - 1, end - 1) 67 if value != start - 1: 68 # If the value falls in the range, this flag is enabled. 69 flag_set.append(Flag(spec, value)) 70 else: 71 # Boolean flags. 72 if random.randint(0, 1): 73 flag_set.append(Flag(spec)) 74 75 return set([Task(FlagSet(flag_set))]) 76 77 78def _GenerateAllFlagsTasks(specs): 79 """Generate a task that all the flags are enable. 80 81 All the boolean flags in the specs will be enabled and all the numeric flag 82 with have the largest legal value. 83 84 Args: 85 specs: A list of spec from which the flag set is created. 86 87 Returns: 88 A set containing a task that has all flags enabled. 89 """ 90 91 flag_set = [] 92 93 for spec in specs: 94 numeric_flag_match = flags.Search(spec) 95 96 if numeric_flag_match: 97 value = (int(numeric_flag_match.group('end')) - 1) 98 else: 99 value = -1 100 flag_set.append(Flag(spec, value)) 101 102 return set([Task(FlagSet(flag_set))]) 103 104 105def _GenerateNoFlagTask(): 106 return set([Task(FlagSet([]))]) 107 108 109def GenerateRandomGATasks(specs, num_tasks, num_trials): 110 """Generate a set of tasks for the Genetic Algorithm. 111 112 Args: 113 specs: A list of spec from which the flag set is created. 114 num_tasks: number of tasks that should be generated. 115 num_trials: the maximum number of tries should be attempted to generate the 116 set of tasks. 117 118 Returns: 119 A set of randomly generated tasks. 120 """ 121 122 tasks = set([]) 123 124 total_trials = 0 125 while len(tasks) < num_tasks and total_trials < num_trials: 126 new_flag = FlagSet([Flag(spec) for spec in specs if random.randint(0, 1)]) 127 new_task = GATask(new_flag) 128 129 if new_task in tasks: 130 total_trials += 1 131 else: 132 tasks.add(new_task) 133 total_trials = 0 134 135 return tasks 136 137 138def _GenerateInitialFlags(specs, spec): 139 """Generate the flag_set of a task in the flag elimination algorithm. 140 141 Set the value of all the flags to the largest value, except for the flag that 142 contains spec. 143 144 For example, if the specs are [-finline-limit=[1-1000], -fstrict-aliasing] and 145 the spec is -finline-limit=[1-1000], then the result is 146 [-finline-limit=[1-1000]:-finline-limit=998, 147 -fstrict-aliasing:-fstrict-aliasing] 148 149 Args: 150 specs: an array of specifications from which the result flag_set is created. 151 The flag_set contains one and only one flag that contain the specification 152 spec. 153 spec: The flag containing this spec should have a value that is smaller than 154 the highest value the flag can have. 155 156 Returns: 157 An array of flags, each of which contains one spec in specs. All the values 158 of the flags are the largest values in specs, expect the one that contains 159 spec. 160 """ 161 162 flag_set = [] 163 for other_spec in specs: 164 numeric_flag_match = flags.Search(other_spec) 165 # Found the spec in the array specs. 166 if other_spec == spec: 167 # Numeric flag will have a value that is smaller than the largest value 168 # and Boolean flag will be deleted. 169 if numeric_flag_match: 170 end = int(numeric_flag_match.group('end')) 171 flag_set.append(flags.Flag(other_spec, end - 2)) 172 173 continue 174 175 # other_spec != spec 176 if numeric_flag_match: 177 # numeric flag 178 end = int(numeric_flag_match.group('end')) 179 flag_set.append(flags.Flag(other_spec, end - 1)) 180 continue 181 182 # boolean flag 183 flag_set.append(flags.Flag(other_spec)) 184 185 return flag_set 186 187 188def _GenerateAllIterativeEliminationTasks(specs): 189 """Generate the initial tasks for the negative flag elimination algorithm. 190 191 Generate the base line task that turns on all the boolean flags and sets the 192 value to be the largest value for the numeric flag. 193 194 For example, if the specs are [-finline-limit=[1-1000], -fstrict-aliasing], 195 the base line is [-finline-limit=[1-1000]:-finline-limit=999, 196 -fstrict-aliasing:-fstrict-aliasing] 197 198 Generate a set of task, each turns off one of the flag or sets a value that is 199 smaller than the largest value for the flag. 200 201 Args: 202 specs: an array of specifications from which the result flag_set is created. 203 204 Returns: 205 An array containing one generation of the initial tasks for the negative 206 flag elimination algorithm. 207 """ 208 209 # The set of tasks to be generated. 210 results = set([]) 211 flag_set = [] 212 213 for spec in specs: 214 numeric_flag_match = flags.Search(spec) 215 if numeric_flag_match: 216 # Numeric flag. 217 end_value = int(numeric_flag_match.group('end')) 218 flag_set.append(flags.Flag(spec, end_value - 1)) 219 continue 220 221 # Boolean flag. 222 flag_set.append(flags.Flag(spec)) 223 224 # The base line task that set all the flags to their largest values. 225 parent_task = Task(flags.FlagSet(flag_set)) 226 results.add(parent_task) 227 228 for spec in specs: 229 results.add(Task(flags.FlagSet(_GenerateInitialFlags(specs, spec)))) 230 231 return [IterativeEliminationFirstGeneration(results, parent_task)] 232 233 234def _ComputeCost(cost_func, specs, flag_set): 235 """Compute the mock cost of the flag_set using the input cost function. 236 237 All the boolean flags in the specs will be enabled and all the numeric flag 238 with have the largest legal value. 239 240 Args: 241 cost_func: The cost function which is used to compute the mock cost of a 242 dictionary of flags. 243 specs: All the specs that are used in the algorithm. This is used to check 244 whether certain flag is disabled in the flag_set dictionary. 245 flag_set: a dictionary of the spec and flag pairs. 246 247 Returns: 248 The mock cost of the input dictionary of the flags. 249 """ 250 251 values = [] 252 253 for spec in specs: 254 # If a flag is enabled, its value is added. Otherwise a padding 0 is added. 255 values.append(flag_set[spec].GetValue() if spec in flag_set else 0) 256 257 # The cost function string can use the values array. 258 return eval(cost_func) 259 260 261def _GenerateTestFlags(num_flags, upper_bound, file_name): 262 """Generate a set of mock flags and write it to a configuration file. 263 264 Generate a set of mock flags 265 266 Args: 267 num_flags: Number of numeric flags to be generated. 268 upper_bound: The value of the upper bound of the range. 269 file_name: The configuration file name into which the mock flags are put. 270 """ 271 272 with open(file_name, 'w') as output_file: 273 num_flags = int(num_flags) 274 upper_bound = int(upper_bound) 275 for i in range(num_flags): 276 output_file.write('%s=[1-%d]\n' % (i, upper_bound)) 277 278 279def _TestAlgorithm(cost_func, specs, generations, best_result): 280 """Test the best result the algorithm should return. 281 282 Set up the framework, run the input algorithm and verify the result. 283 284 Args: 285 cost_func: The cost function which is used to compute the mock cost of a 286 dictionary of flags. 287 specs: All the specs that are used in the algorithm. This is used to check 288 whether certain flag is disabled in the flag_set dictionary. 289 generations: The initial generations to be evaluated. 290 best_result: The expected best result of the algorithm. If best_result is 291 -1, the algorithm may or may not return the best value. Therefore, no 292 assertion will be inserted. 293 """ 294 295 # Set up the utilities to test the framework. 296 manager = multiprocessing.Manager() 297 input_queue = manager.Queue() 298 output_queue = manager.Queue() 299 pp_steer = multiprocessing.Process(target=Steering, 300 args=(set(), generations, output_queue, 301 input_queue)) 302 pp_steer.start() 303 304 # The best result of the algorithm so far. 305 result = sys.maxint 306 307 while True: 308 task = input_queue.get() 309 310 # POISONPILL signal the ends of the algorithm. 311 if task == pipeline_process.POISONPILL: 312 break 313 314 task.SetResult(BUILD_STAGE, (0, 0, 0, 0, 0)) 315 316 # Compute the mock cost for the task. 317 task_result = _ComputeCost(cost_func, specs, task.GetFlags()) 318 task.SetResult(TEST_STAGE, task_result) 319 320 # If the mock result of the current task is the best so far, set this 321 # result to be the best result. 322 if task_result < result: 323 result = task_result 324 325 output_queue.put(task) 326 327 pp_steer.join() 328 329 # Only do this test when best_result is not -1. 330 if best_result != -1: 331 assert best_result == result 332 333 334class MockAlgorithmsTest(unittest.TestCase): 335 """This class mock tests different steering algorithms. 336 337 The steering algorithms are responsible for generating the next set of tasks 338 to run in each iteration. This class does a functional testing on the 339 algorithms. It mocks out the computation of the fitness function from the 340 build and test phases by letting the user define the fitness function. 341 """ 342 343 def _GenerateFlagSpecifications(self): 344 """Generate the testing specifications.""" 345 346 mock_test_file = 'scale_mock_test' 347 _GenerateTestFlags(NUM_FLAGS, FLAG_RANGES, mock_test_file) 348 return flags.ReadConf(mock_test_file) 349 350 def testBestHillClimb(self): 351 """Test the best hill climb algorithm. 352 353 Test whether it finds the best results as expected. 354 """ 355 356 # Initiate the build/test command and the log directory. 357 Task.InitLogCommand(None, None, 'output') 358 359 # Generate the testing specs. 360 specs = self._GenerateFlagSpecifications() 361 362 # Generate the initial generations for a test whose cost function is the 363 # summation of the values of all the flags. 364 generation_tasks = _GenerateAllFlagsTasks(specs) 365 generations = [HillClimbingBestBranch(generation_tasks, set([]), specs)] 366 367 # Test the algorithm. The cost function is the summation of all the values 368 # of all the flags. Therefore, the best value is supposed to be 0, i.e., 369 # when all the flags are disabled. 370 _TestAlgorithm('sum(values[0:len(values)])', specs, generations, 0) 371 372 # This test uses a cost function that is the negative of the previous cost 373 # function. Therefore, the best result should be found in task with all the 374 # flags enabled. 375 cost_function = 'sys.maxint - sum(values[0:len(values)])' 376 all_flags = list(generation_tasks)[0].GetFlags() 377 cost = _ComputeCost(cost_function, specs, all_flags) 378 379 # Generate the initial generations. 380 generation_tasks = _GenerateNoFlagTask() 381 generations = [HillClimbingBestBranch(generation_tasks, set([]), specs)] 382 383 # Test the algorithm. The cost function is negative of the summation of all 384 # the values of all the flags. Therefore, the best value is supposed to be 385 # 0, i.e., when all the flags are disabled. 386 _TestAlgorithm(cost_function, specs, generations, cost) 387 388 def testGeneticAlgorithm(self): 389 """Test the Genetic Algorithm. 390 391 Do a functional testing here and see how well it scales. 392 """ 393 394 # Initiate the build/test command and the log directory. 395 Task.InitLogCommand(None, None, 'output') 396 397 # Generate the testing specs. 398 specs = self._GenerateFlagSpecifications() 399 # Initiate the build/test command and the log directory. 400 GAGeneration.InitMetaData(STOP_THRESHOLD, NUM_CHROMOSOMES, NUM_TRIALS, 401 specs, MUTATION_RATE) 402 403 # Generate the initial generations. 404 generation_tasks = GenerateRandomGATasks(specs, NUM_CHROMOSOMES, 405 NUM_TRIALS) 406 generations = [GAGeneration(generation_tasks, set([]), 0)] 407 408 # Test the algorithm. 409 _TestAlgorithm('sum(values[0:len(values)])', specs, generations, -1) 410 cost_func = 'sys.maxint - sum(values[0:len(values)])' 411 _TestAlgorithm(cost_func, specs, generations, -1) 412 413 def testIterativeElimination(self): 414 """Test the iterative elimination algorithm. 415 416 Test whether it finds the best results as expected. 417 """ 418 419 # Initiate the build/test command and the log directory. 420 Task.InitLogCommand(None, None, 'output') 421 422 # Generate the testing specs. 423 specs = self._GenerateFlagSpecifications() 424 425 # Generate the initial generations. The generation contains the base line 426 # task that turns on all the flags and tasks that each turn off one of the 427 # flags. 428 generations = _GenerateAllIterativeEliminationTasks(specs) 429 430 # Test the algorithm. The cost function is the summation of all the values 431 # of all the flags. Therefore, the best value is supposed to be 0, i.e., 432 # when all the flags are disabled. 433 _TestAlgorithm('sum(values[0:len(values)])', specs, generations, 0) 434 435 # This test uses a cost function that is the negative of the previous cost 436 # function. Therefore, the best result should be found in task with all the 437 # flags enabled. 438 all_flags_tasks = _GenerateAllFlagsTasks(specs) 439 cost_function = 'sys.maxint - sum(values[0:len(values)])' 440 # Compute the cost of the task that turns on all the flags. 441 all_flags = list(all_flags_tasks)[0].GetFlags() 442 cost = _ComputeCost(cost_function, specs, all_flags) 443 444 # Test the algorithm. The cost function is negative of the summation of all 445 # the values of all the flags. Therefore, the best value is supposed to be 446 # 0, i.e., when all the flags are disabled. 447 # The concrete type of the generation decides how the next generation will 448 # be generated. 449 _TestAlgorithm(cost_function, specs, generations, cost) 450 451if __name__ == '__main__': 452 unittest.main() 453