buildbot_utils.py revision e58198707a6524ad3f731c61c9d5ef140a342451
1# Copyright 2016 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4"""Utilities for launching and accessing ChromeOS buildbots."""
5
6from __future__ import print_function
7
8import base64
9import json
10import os
11import time
12import urllib2
13
14# pylint: disable=no-name-in-module
15from oauth2client.service_account import ServiceAccountCredentials
16
17from cros_utils import command_executer
18from cros_utils import logger
19from cros_utils import buildbot_json
20
21INITIAL_SLEEP_TIME = 7200 # 2 hours; wait time before polling buildbot.
22SLEEP_TIME = 600  # 10 minutes; time between polling of buildbot.
23TIME_OUT = 28800  # Decide the build is dead or will never finish
24# after this time (8 hours).
25OK_STATUS = [  # List of result status values that are 'ok'.
26    # This was obtained from:
27    #   https://chromium.googlesource.com/chromium/tools/build/+/
28    #       master/third_party/buildbot_8_4p1/buildbot/status/results.py
29    0,  # "success"
30    1,  # "warnings"
31    6,  # "retry"
32]
33
34
35class BuildbotTimeout(Exception):
36  """Exception to throw when a buildbot operation timesout."""
37  pass
38
39
40def ParseReportLog(url, build):
41  """Scrape the trybot image name off the Reports log page.
42
43  This takes the URL for a trybot Reports Stage web page,
44  and a trybot build type, such as 'daisy-release'.  It
45  opens the web page and parses it looking for the trybot
46  artifact name (e.g. something like
47  'trybot-daisy-release/R40-6394.0.0-b1389'). It returns the
48  artifact name, if found.
49  """
50  trybot_image = ''
51  url += '/text'
52  newurl = url.replace('uberchromegw', 'chromegw')
53  webpage = urllib2.urlopen(newurl)
54  data = webpage.read()
55  lines = data.split('\n')
56  for l in lines:
57    if l.find('Artifacts') > 0 and l.find('trybot') > 0:
58      trybot_name = 'trybot-%s' % build
59      start_pos = l.find(trybot_name)
60      end_pos = l.find('@https://storage')
61      trybot_image = l[start_pos:end_pos]
62
63  return trybot_image
64
65
66def GetBuildData(buildbot_queue, build_id):
67  """Find the Reports stage web page for a trybot build.
68
69  This takes the name of a buildbot_queue, such as 'daisy-release'
70  and a build id (the build number), and uses the json buildbot api to
71  find the Reports stage web page for that build, if it exists.
72  """
73  builder = buildbot_json.Buildbot(
74      'http://chromegw/p/tryserver.chromiumos/').builders[buildbot_queue]
75  build_data = builder.builds[build_id].data
76  logs = build_data['logs']
77  for l in logs:
78    fname = l[1]
79    if 'steps/Report/' in fname:
80      return fname
81
82  return ''
83
84
85def FindBuildRecordFromLog(description, build_info):
86  """Find the right build record in the build logs.
87
88  Get the first build record from build log with a reason field
89  that matches 'description'. ('description' is a special tag we
90  created when we launched the buildbot, so we could find it at this
91  point.)
92  """
93  for build_log in build_info:
94    if description in build_log['reason']:
95      return build_log
96  return {}
97
98
99def GetBuildInfo(file_dir, waterfall_builder):
100  """Get all the build records for the trybot builds."""
101
102  builder = ''
103  if waterfall_builder.endswith('-release'):
104    builder = 'release'
105  elif waterfall_builder.endswith('-gcc-toolchain'):
106    builder = 'gcc_toolchain'
107  elif waterfall_builder.endswith('-llvm-toolchain'):
108    builder = 'llvm_toolchain'
109
110  sa_file = os.path.expanduser(
111      os.path.join(file_dir, 'cros_utils',
112                   'chromeos-toolchain-credentials.json'))
113  scopes = ['https://www.googleapis.com/auth/userinfo.email']
114
115  credentials = ServiceAccountCredentials.from_json_keyfile_name(
116      sa_file, scopes=scopes)
117  url = (
118      'https://luci-milo.appspot.com/prpc/milo.Buildbot/GetBuildbotBuildsJSON')
119
120  # NOTE: If we want to get build logs for the main waterfall builders, the
121  # 'master' field below should be 'chromeos' instead of 'chromiumos.tryserver'.
122  # Builder would be 'amd64-gcc-toolchain' or 'arm-llvm-toolchain', etc.
123
124  body = json.dumps({
125      'master': 'chromiumos.tryserver',
126      'builder': builder,
127      'include_current': True,
128      'limit': 100
129  })
130  access_token = credentials.get_access_token()
131  headers = {
132      'Accept': 'application/json',
133      'Content-Type': 'application/json',
134      'Authorization': 'Bearer %s' % access_token.access_token
135  }
136  r = urllib2.Request(url, body, headers)
137  u = urllib2.urlopen(r, timeout=60)
138  u.read(4)
139  o = json.load(u)
140  data = [base64.b64decode(item['data']) for item in o['builds']]
141  result = []
142  for d in data:
143    tmp = json.loads(d)
144    result.append(tmp)
145  return result
146
147
148def FindArchiveImage(chromeos_root, build, build_id):
149  """Returns name of the trybot artifact for board/build_id."""
150  ce = command_executer.GetCommandExecuter()
151  command = ('gsutil ls gs://chromeos-image-archive/trybot-%s/*b%s'
152             '/chromiumos_test_image.tar.xz' % (build, build_id))
153  _, out, _ = ce.ChrootRunCommandWOutput(
154      chromeos_root, command, print_to_console=False)
155  #
156  # If build_id is not unique, there may be multiple archive images
157  # to choose from; sort them & pick the first (newest).
158  #
159  # If there are multiple archive images found, out will look something
160  # like this:
161  #
162  # 'gs://.../R35-5692.0.0-b105/chromiumos_test_image.tar.xz
163  #  gs://.../R46-7339.0.0-b105/chromiumos_test_image.tar.xz'
164  #
165  out = out.rstrip('\n')
166  tmp_list = out.split('\n')
167  # After stripping the final '\n' and splitting on any other '\n', we get
168  # something like this:
169  #  tmp_list = [ 'gs://.../R35-5692.0.0-b105/chromiumos_test_image.tar.xz' ,
170  #               'gs://.../R46-7339.0.0-b105/chromiumos_test_image.tar.xz' ]
171  #
172  #  If we sort this in descending order, we should end up with the most
173  #  recent test image first, so that's what we do here.
174  #
175  if len(tmp_list) > 1:
176    tmp_list = sorted(tmp_list, reverse=True)
177  out = tmp_list[0]
178
179  trybot_image = ''
180  trybot_name = 'trybot-%s' % build
181  if out and out.find(trybot_name) > 0:
182    start_pos = out.find(trybot_name)
183    end_pos = out.find('/chromiumos_test_image')
184    trybot_image = out[start_pos:end_pos]
185
186  return trybot_image
187
188
189def GetTrybotImage(chromeos_root,
190                   buildbot_name,
191                   patch_list,
192                   build_tag,
193                   other_flags=None,
194                   build_toolchain=False,
195                   async=False):
196  """Launch buildbot and get resulting trybot artifact name.
197
198  This function launches a buildbot with the appropriate flags to
199  build the test ChromeOS image, with the current ToT mobile compiler.  It
200  checks every 10 minutes to see if the trybot has finished.  When the trybot
201  has finished, it parses the resulting report logs to find the trybot
202  artifact (if one was created), and returns that artifact name.
203
204  chromeos_root is the path to the ChromeOS root, needed for finding chromite
205  and launching the buildbot.
206
207  buildbot_name is the name of the buildbot queue, such as lumpy-release or
208  daisy-paladin.
209
210  patch_list a python list of the patches, if any, for the buildbot to use.
211
212  build_tag is a (unique) string to be used to look up the buildbot results
213  from among all the build records.
214  """
215  ce = command_executer.GetCommandExecuter()
216  cbuildbot_path = os.path.join(chromeos_root, 'chromite/cbuildbot')
217  base_dir = os.getcwd()
218  patch_arg = ''
219  if patch_list:
220    for p in patch_list:
221      patch_arg = patch_arg + ' -g ' + repr(p)
222  toolchain_flags = ''
223  if build_toolchain:
224    toolchain_flags += '--latest-toolchain'
225  os.chdir(cbuildbot_path)
226  if other_flags:
227    optional_flags = ' '.join(other_flags)
228  else:
229    optional_flags = ''
230
231  # Launch buildbot with appropriate flags.
232  build = buildbot_name
233  description = build_tag
234  command_prefix = ''
235  if not patch_arg:
236    command_prefix = 'yes | '
237  command = ('%s ./cbuildbot --remote --nochromesdk %s'
238             ' --remote-description=%s %s %s %s' % (command_prefix,
239                                                    optional_flags, description,
240                                                    toolchain_flags, patch_arg,
241                                                    build))
242  _, out, _ = ce.RunCommandWOutput(command)
243  if 'Tryjob submitted!' not in out:
244    logger.GetLogger().LogFatal('Error occurred while launching trybot job: '
245                                '%s' % command)
246
247  os.chdir(base_dir)
248
249  build_id = 0
250  build_status = None
251  # Wait for  buildbot to finish running (check every 10 minutes).  Wait
252  # 10 minutes before the first check to give the buildbot time to launch
253  # (so we don't start looking for build data before it's out there).
254  time.sleep(SLEEP_TIME)
255  done = False
256  pending = True
257  # pending_time is the time between when we submit the job and when the
258  # buildbot actually launches the build.  running_time is the time between
259  # when the buildbot job launches and when it finishes.  The job is
260  # considered 'pending' until we can find an entry for it in the buildbot
261  # logs.
262  pending_time = SLEEP_TIME
263  running_time = 0
264  long_slept = False
265  while not done:
266    done = True
267    build_info = GetBuildInfo(base_dir, build)
268    if not build_info:
269      if pending_time > TIME_OUT:
270        logger.GetLogger().LogFatal('Unable to get build logs for target %s.' %
271                                    build)
272      else:
273        pending_message = 'Unable to find build log; job may be pending.'
274        done = False
275
276    if done:
277      data_dict = FindBuildRecordFromLog(description, build_info)
278      if not data_dict:
279        # Trybot job may be pending (not actually launched yet).
280        if pending_time > TIME_OUT:
281          logger.GetLogger().LogFatal('Unable to find build record for trybot'
282                                      ' %s.' % description)
283        else:
284          pending_message = 'Unable to find build record; job may be pending.'
285          done = False
286
287      else:
288        # Now that we have actually found the entry for the build
289        # job in the build log, we know the job is actually
290        # runnning, not pending, so we flip the 'pending' flag.  We
291        # still have to wait for the buildbot job to finish running
292        # however.
293        pending = False
294        build_id = data_dict['number']
295
296        if async:
297           # Do not wait for trybot job to finish; return immediately
298          return build_id, " "
299
300        if not long_slept:
301          # The trybot generally takes more than 2 hours to finish.
302          # Wait two hours before polling the status.
303          long_slept = True
304          time.sleep(INITIAL_SLEEP_TIME)
305          pending_time += INITIAL_SLEEP_TIME
306        if True == data_dict['finished']:
307          build_status = data_dict['results']
308        else:
309          done = False
310
311    if not done:
312      if pending:
313        logger.GetLogger().LogOutput(pending_message)
314        logger.GetLogger().LogOutput('Current pending time: %d minutes.' %
315                                     (pending_time / 60))
316        pending_time += SLEEP_TIME
317      else:
318        logger.GetLogger().LogOutput('{0} minutes passed.'.format(running_time /
319                                                                  60))
320        logger.GetLogger().LogOutput('Sleeping {0} seconds.'.format(SLEEP_TIME))
321        running_time += SLEEP_TIME
322
323      time.sleep(SLEEP_TIME)
324      if running_time > TIME_OUT:
325        done = True
326
327  trybot_image = ''
328
329  if build.endswith('-toolchain'):
330    # For rotating testers, we don't care about their build_status
331    # result, because if any HWTest failed it will be non-zero.
332    trybot_image = FindArchiveImage(chromeos_root, build, build_id)
333  else:
334    # The nightly performance tests do not run HWTests, so if
335    # their build_status is non-zero, we do care.  In this case
336    # non-zero means the image itself probably did not build.
337    if build_status in OK_STATUS:
338      trybot_image = FindArchiveImage(chromeos_root, build, build_id)
339  if not trybot_image:
340    logger.GetLogger().LogError('Trybot job %s failed with status %d;'
341                                ' no trybot image generated.' %
342                                (description, build_status))
343
344  logger.GetLogger().LogOutput("trybot_image is '%s'" % trybot_image)
345  logger.GetLogger().LogOutput('build_status is %d' % build_status)
346  return build_id, trybot_image
347
348
349def GetGSContent(chromeos_root, path):
350  """gsutil cat path"""
351
352  ce = command_executer.GetCommandExecuter()
353  command = ('gsutil cat gs://chromeos-image-archive/%s' % path)
354  _, out, _ = ce.ChrootRunCommandWOutput(
355      chromeos_root, command, print_to_console=False)
356  return out
357
358
359def DoesImageExist(chromeos_root, build):
360  """Check if the image for the given build exists."""
361
362  ce = command_executer.GetCommandExecuter()
363  command = ('gsutil ls gs://chromeos-image-archive/%s'
364             '/chromiumos_test_image.tar.xz' % (build))
365  ret = ce.ChrootRunCommand(chromeos_root, command, print_to_console=False)
366  return not ret
367
368
369def WaitForImage(chromeos_root, build):
370  """Wait for an image to be ready."""
371
372  elapsed_time = 0
373  while elapsed_time < TIME_OUT:
374    if DoesImageExist(chromeos_root, build):
375      return
376    logger.GetLogger().LogOutput('Image %s not ready, waiting for 10 minutes' %
377                                 build)
378    time.sleep(SLEEP_TIME)
379    elapsed_time += SLEEP_TIME
380
381  logger.GetLogger().LogOutput('Image %s not found, waited for %d hours' %
382                               (build, (TIME_OUT / 3600)))
383  raise BuildbotTimeout('Timeout while waiting for image %s' % build)
384