genetic_algorithm.py revision e896dfd76014af3c399d1b54be022fb1663a105b
1# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""The hill genetic algorithm.
6
7Part of the Chrome build flags optimization.
8"""
9
10__author__ = 'yuhenglong@google.com (Yuheng Long)'
11
12import random
13
14import flags
15from flags import Flag
16from flags import FlagSet
17from generation import Generation
18from task import Task
19
20
21def CrossoverWith(first_flag, second_flag):
22  """Get a crossed over gene.
23
24  At present, this just picks either/or of these values.  However, it could be
25  implemented as an integer maskover effort, if required.
26
27  Args:
28    first_flag: The first gene (Flag) to cross over with.
29    second_flag: The second gene (Flag) to cross over with.
30
31  Returns:
32    A Flag that can be considered appropriately randomly blended between the
33    first and second input flag.
34  """
35
36  return first_flag if random.randint(0, 1) else second_flag
37
38
39def RandomMutate(specs, flag_set, mutation_rate):
40  """Randomly mutate the content of a task.
41
42  Args:
43    specs: A list of spec from which the flag set is created.
44    flag_set: The current flag set being mutated
45    mutation_rate: What fraction of genes to mutate.
46
47  Returns:
48    A Genetic Task constructed by randomly mutating the input flag set.
49  """
50
51  results_flags = []
52
53  for spec in specs:
54    # Randomly choose whether this flag should be mutated.
55    if random.randint(0, int(1 / mutation_rate)):
56      continue
57
58    # If the flag is not already in the flag set, it is added.
59    if spec not in flag_set:
60      results_flags.append(Flag(spec))
61      continue
62
63    # If the flag is already in the flag set, it is mutated.
64    numeric_flag_match = flags.Search(spec)
65
66    # The value of a numeric flag will be changed, and a boolean flag will be
67    # dropped.
68    if not numeric_flag_match:
69      continue
70
71    value = flag_set[spec].GetValue()
72
73    # Randomly select a nearby value of the current value of the flag.
74    rand_arr = [value]
75    if value + 1 < int(numeric_flag_match.group('end')):
76      rand_arr.append(value + 1)
77
78    rand_arr.append(value - 1)
79    value = random.sample(rand_arr, 1)[0]
80
81    # If the value is smaller than the start of the spec, this flag will be
82    # dropped.
83    if value != int(numeric_flag_match.group('start')) - 1:
84      results_flags.append(Flag(spec, value))
85
86  return GATask(FlagSet(results_flags))
87
88
89class GATask(Task):
90  def __init__(self, flag_set):
91    Task.__init__(self, flag_set)
92
93  def ReproduceWith(self, other, specs, mutation_rate):
94    """Reproduce with other FlagSet.
95
96    Args:
97      other: A FlagSet to reproduce with.
98      specs: A list of spec from which the flag set is created.
99      mutation_rate: one in mutation_rate flags will be mutated (replaced by a
100        random version of the same flag, instead of one from either of the
101        parents).  Set to 0 to disable mutation.
102
103    Returns:
104      A GA task made by mixing self with other.
105    """
106
107    # Get the flag dictionary.
108    father_flags = self.GetFlags().GetFlags()
109    mother_flags = other.GetFlags().GetFlags()
110
111    # Flags that are common in both parents and flags that belong to only one
112    # parent.
113    self_flags = []
114    other_flags = []
115    common_flags = []
116
117    # Find out flags that are common to both parent and flags that belong soly
118    # to one parent.
119    for self_flag in father_flags:
120      if self_flag in mother_flags:
121        common_flags.append(self_flag)
122      else:
123        self_flags.append(self_flag)
124
125    for other_flag in mother_flags:
126      if other_flag not in father_flags:
127        other_flags.append(other_flag)
128
129    # Randomly select flags that belong to only one parent.
130    output_flags = [father_flags[f] for f in self_flags if random.randint(0, 1)]
131    others = [mother_flags[f] for f in other_flags if random.randint(0, 1)]
132    output_flags.extend(others)
133    # Turn on flags that belong to both parent. Randomly choose the value of the
134    # flag from either parent.
135    for flag in common_flags:
136      output_flags.append(CrossoverWith(father_flags[flag], mother_flags[flag]))
137
138    # Mutate flags
139    if mutation_rate:
140      return RandomMutate(specs, FlagSet(output_flags), mutation_rate)
141
142    return GATask(FlagSet(output_flags))
143
144
145class GAGeneration(Generation):
146  """The Genetic Algorithm."""
147
148  # The value checks whether the algorithm has converged and arrives at a fixed
149  # point. If STOP_THRESHOLD of generations have not seen any performance
150  # improvement, the Genetic Algorithm stops.
151  STOP_THRESHOLD = None
152
153  # Number of tasks in each generation.
154  NUM_CHROMOSOMES = None
155
156  # The value checks whether the algorithm has converged and arrives at a fixed
157  # point. If NUM_TRIALS of trials have been attempted to generate a new task
158  # without a success, the Genetic Algorithm stops.
159  NUM_TRIALS = None
160
161  # The flags that can be used to generate new tasks.
162  SPECS = None
163
164  # What fraction of genes to mutate.
165  MUTATION_RATE = 0
166
167  @staticmethod
168  def InitMetaData(stop_threshold, num_chromosomes, num_trials, specs,
169                   mutation_rate):
170    """Set up the meta data for the Genetic Algorithm.
171
172    Args:
173      stop_threshold: The number of generations, upon which no performance has
174        seen, the Genetic Algorithm stops.
175      num_chromosomes: Number of tasks in each generation.
176      num_trials: The number of trials, upon which new task has been tried to
177        generated without success, the Genetic Algorithm stops.
178      specs: The flags that can be used to generate new tasks.
179      mutation_rate: What fraction of genes to mutate.
180    """
181
182    GAGeneration.STOP_THRESHOLD = stop_threshold
183    GAGeneration.NUM_CHROMOSOMES = num_chromosomes
184    GAGeneration.NUM_TRIALS = num_trials
185    GAGeneration.SPECS = specs
186    GAGeneration.MUTATION_RATE = mutation_rate
187
188  def __init__(self, tasks, parents, total_stucks):
189    """Set up the meta data for the Genetic Algorithm.
190
191    Args:
192      tasks: A set of tasks to be run.
193      parents: A set of tasks from which this new generation is produced. This
194        set also contains the best tasks generated so far.
195      total_stucks: The number of generations that have not seen improvement.
196        The Genetic Algorithm will stop once the total_stucks equals to
197        NUM_TRIALS defined in the GAGeneration class.
198    """
199
200    Generation.__init__(self, tasks, parents)
201    self._total_stucks = total_stucks
202
203  def IsImproved(self):
204    """True if this generation has improvement upon its parent generation."""
205
206    tasks = self.Pool()
207    parents = self.CandidatePool()
208
209    # The first generate does not have parents.
210    if not parents:
211      return True
212
213    # Found out whether a task has improvement upon the best task in the
214    # parent generation.
215    best_parent = sorted(parents, key=lambda task: task.GetTestResult())[0]
216    best_current = sorted(tasks, key=lambda task: task.GetTestResult())[0]
217
218    # At least one task has improvement.
219    if best_current.IsImproved(best_parent):
220      self._total_stucks = 0
221      return True
222
223    # If STOP_THRESHOLD of generations have no improvement, the algorithm stops.
224    if self._total_stucks >= GAGeneration.STOP_THRESHOLD:
225      return False
226
227    self._total_stucks += 1
228    return True
229
230  def Next(self, cache):
231    """Calculate the next generation.
232
233    Generate a new generation from the a set of tasks. This set contains the
234      best set seen so far and the tasks executed in the parent generation.
235
236    Args:
237      cache: A set of tasks that have been generated before.
238
239    Returns:
240      A set of new generations.
241    """
242
243    target_len = GAGeneration.NUM_CHROMOSOMES
244    specs = GAGeneration.SPECS
245    mutation_rate = GAGeneration.MUTATION_RATE
246
247    # Collect a set of size target_len of tasks. This set will be used to
248    # produce a new generation of tasks.
249    gen_tasks = [task for task in self.Pool()]
250
251    parents = self.CandidatePool()
252    if parents:
253      gen_tasks.extend(parents)
254
255    # A set of tasks that are the best. This set will be used as the parent
256    # generation to produce the next generation.
257    sort_func = lambda task: task.GetTestResult()
258    retained_tasks = sorted(gen_tasks, key=sort_func)[:target_len]
259
260    child_pool = set()
261    for father in retained_tasks:
262      num_trials = 0
263      # Try num_trials times to produce a new child.
264      while num_trials < GAGeneration.NUM_TRIALS:
265        # Randomly select another parent.
266        mother = random.choice(retained_tasks)
267        # Cross over.
268        child = mother.ReproduceWith(father, specs, mutation_rate)
269        if child not in child_pool and child not in cache:
270          child_pool.add(child)
271          break
272        else:
273          num_trials += 1
274
275    num_trials = 0
276
277    while len(child_pool) < target_len and num_trials < GAGeneration.NUM_TRIALS:
278      for keep_task in retained_tasks:
279        # Mutation.
280        child = RandomMutate(specs, keep_task.GetFlags(), mutation_rate)
281        if child not in child_pool and child not in cache:
282          child_pool.add(child)
283          if len(child_pool) >= target_len:
284            break
285        else:
286          num_trials += 1
287
288    # If NUM_TRIALS of tries have been attempted without generating a set of new
289    # tasks, the algorithm stops.
290    if num_trials >= GAGeneration.NUM_TRIALS:
291      return []
292
293    assert len(child_pool) == target_len
294
295    return [GAGeneration(child_pool, set(retained_tasks), self._total_stucks)]
296