site_utils.py revision 6c00dde741fb42fed2d8bb25c30990557b288097
1# Copyright (c) 2012 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4import json
5import logging
6import os
7import re
8import signal
9import socket
10import time
11import urllib2
12
13from autotest_lib.client.common_lib import base_utils, error, global_config
14from autotest_lib.client.cros import constants
15
16
17# Keep checking if the pid is alive every second until the timeout (in seconds)
18CHECK_PID_IS_ALIVE_TIMEOUT = 6
19
20_LOCAL_HOST_LIST = ('localhost', '127.0.0.1')
21
22LAB_GOOD_STATES = ('open', 'throttled')
23
24
25class ParseBuildNameException(Exception):
26    """Raised when ParseBuildName() cannot parse a build name."""
27    pass
28
29
30def ParseBuildName(name):
31    """Format a build name, given board, type, milestone, and manifest num.
32
33    @param name: a build name, e.g. 'x86-alex-release/R20-2015.0.0'
34    @return board: board the manifest is for, e.g. x86-alex.
35    @return type: one of 'release', 'factory', or 'firmware'
36    @return milestone: (numeric) milestone the manifest was associated with.
37    @return manifest: manifest number, e.g. '2015.0.0'
38    """
39    match = re.match(r'([\w-]+)-(\w+)/R(\d+)-([\d.ab-]+)', name)
40    if match and len(match.groups()) == 4:
41        return match.groups()
42    raise ParseBuildNameException('%s is a malformed build name.' % name)
43
44
45def ping(host, deadline=None, tries=None, timeout=60):
46    """Attempt to ping |host|.
47
48    Shell out to 'ping' to try to reach |host| for |timeout| seconds.
49    Returns exit code of ping.
50
51    Per 'man ping', if you specify BOTH |deadline| and |tries|, ping only
52    returns 0 if we get responses to |tries| pings within |deadline| seconds.
53
54    Specifying |deadline| or |count| alone should return 0 as long as
55    some packets receive responses.
56
57    @param host: the host to ping.
58    @param deadline: seconds within which |tries| pings must succeed.
59    @param tries: number of pings to send.
60    @param timeout: number of seconds after which to kill 'ping' command.
61    @return exit code of ping command.
62    """
63    args = [host]
64    if deadline:
65        args.append('-w%d' % deadline)
66    if tries:
67        args.append('-c%d' % tries)
68    return base_utils.run('ping', args=args,
69                          ignore_status=True, timeout=timeout,
70                          stdout_tee=base_utils.TEE_TO_LOGS,
71                          stderr_tee=base_utils.TEE_TO_LOGS).exit_status
72
73
74def host_is_in_lab_zone(hostname):
75    """Check if the host is in the CROS.dns_zone.
76
77    @param hostname: The hostname to check.
78    @returns True if hostname.dns_zone resolves, otherwise False.
79    """
80    host_parts = hostname.split('.')
81    dns_zone = global_config.global_config.get_config_value('CROS', 'dns_zone',
82                                                            default=None)
83    fqdn = '%s.%s' % (host_parts[0], dns_zone)
84    try:
85        socket.gethostbyname(fqdn)
86        return True
87    except socket.gaierror:
88      return False
89
90
91def get_chrome_version(job_views):
92    """
93    Retrieves the version of the chrome binary associated with a job.
94
95    When a test runs we query the chrome binary for it's version and drop
96    that value into a client keyval. To retrieve the chrome version we get all
97    the views associated with a test from the db, including those of the
98    server and client jobs, and parse the version out of the first test view
99    that has it. If we never ran a single test in the suite the job_views
100    dictionary will not contain a chrome version.
101
102    This method cannot retrieve the chrome version from a dictionary that
103    does not conform to the structure of an autotest tko view.
104
105    @param job_views: a list of a job's result views, as returned by
106                      the get_detailed_test_views method in rpc_interface.
107    @return: The chrome version string, or None if one can't be found.
108    """
109
110    # Aborted jobs have no views.
111    if not job_views:
112        return None
113
114    for view in job_views:
115        if (view.get('attributes')
116            and constants.CHROME_VERSION in view['attributes'].keys()):
117
118            return view['attributes'].get(constants.CHROME_VERSION)
119
120    logging.warning('Could not find chrome version for failure.')
121    return None
122
123
124def get_current_board():
125    """Return the current board name.
126
127    @return current board name, e.g "lumpy", None on fail.
128    """
129    with open('/etc/lsb-release') as lsb_release_file:
130        for line in lsb_release_file:
131            m = re.match(r'^CHROMEOS_RELEASE_BOARD=(.+)$', line)
132            if m:
133                return m.group(1)
134    return None
135
136
137# TODO(petermayo): crosbug.com/31826 Share this with _GsUpload in
138# //chromite.git/buildbot/prebuilt.py somewhere/somehow
139def gs_upload(local_file, remote_file, acl, result_dir=None,
140              transfer_timeout=300, acl_timeout=300):
141    """Upload to GS bucket.
142
143    @param local_file: Local file to upload
144    @param remote_file: Remote location to upload the local_file to.
145    @param acl: name or file used for controlling access to the uploaded
146                file.
147    @param result_dir: Result directory if you want to add tracing to the
148                       upload.
149    @param transfer_timeout: Timeout for this upload call.
150    @param acl_timeout: Timeout for the acl call needed to confirm that
151                        the uploader has permissions to execute the upload.
152
153    @raise CmdError: the exit code of the gsutil call was not 0.
154
155    @returns True/False - depending on if the upload succeeded or failed.
156    """
157    # https://developers.google.com/storage/docs/accesscontrol#extension
158    CANNED_ACLS = ['project-private', 'private', 'public-read',
159                   'public-read-write', 'authenticated-read',
160                   'bucket-owner-read', 'bucket-owner-full-control']
161    _GSUTIL_BIN = 'gsutil'
162    acl_cmd = None
163    if acl in CANNED_ACLS:
164        cmd = '%s cp -a %s %s %s' % (_GSUTIL_BIN, acl, local_file, remote_file)
165    else:
166        # For private uploads we assume that the overlay board is set up
167        # properly and a googlestore_acl.xml is present, if not this script
168        # errors
169        cmd = '%s cp -a private %s %s' % (_GSUTIL_BIN, local_file, remote_file)
170        if not os.path.exists(acl):
171            logging.error('Unable to find ACL File %s.', acl)
172            return False
173        acl_cmd = '%s setacl %s %s' % (_GSUTIL_BIN, acl, remote_file)
174    if not result_dir:
175        base_utils.run(cmd, timeout=transfer_timeout, verbose=True)
176        if acl_cmd:
177            base_utils.run(acl_cmd, timeout=acl_timeout, verbose=True)
178        return True
179    with open(os.path.join(result_dir, 'tracing'), 'w') as ftrace:
180        ftrace.write('Preamble\n')
181        base_utils.run(cmd, timeout=transfer_timeout, verbose=True,
182                       stdout_tee=ftrace, stderr_tee=ftrace)
183        if acl_cmd:
184            ftrace.write('\nACL setting\n')
185            # Apply the passed in ACL xml file to the uploaded object.
186            base_utils.run(acl_cmd, timeout=acl_timeout, verbose=True,
187                           stdout_tee=ftrace, stderr_tee=ftrace)
188        ftrace.write('Postamble\n')
189        return True
190
191
192def gs_ls(uri_pattern):
193    """Returns a list of URIs that match a given pattern.
194
195    @param uri_pattern: a GS URI pattern, may contain wildcards
196
197    @return A list of URIs matching the given pattern.
198
199    @raise CmdError: the gsutil command failed.
200
201    """
202    gs_cmd = ' '.join(['gsutil', 'ls', uri_pattern])
203    result = base_utils.system_output(gs_cmd).splitlines()
204    return [path.rstrip() for path in result if path]
205
206
207def nuke_pids(pid_list, signal_queue=[signal.SIGTERM, signal.SIGKILL]):
208    """
209    Given a list of pid's, kill them via an esclating series of signals.
210
211    @param pid_list: List of PID's to kill.
212    @param signal_queue: Queue of signals to send the PID's to terminate them.
213    """
214    for sig in signal_queue:
215        logging.debug('Sending signal %s to the following pids:', sig)
216        for pid in pid_list:
217            logging.debug('Pid %d', pid)
218            try:
219                os.kill(pid, sig)
220            except OSError:
221                # The process may have died from a previous signal before we
222                # could kill it.
223                pass
224        time.sleep(CHECK_PID_IS_ALIVE_TIMEOUT)
225    failed_list = []
226    if signal.SIGKILL in signal_queue:
227        return
228    for pid in pid_list:
229        if base_utils.pid_is_alive(pid):
230            failed_list.append('Could not kill %d for process name: %s.' % pid,
231                               base_utils.get_process_name(pid))
232    if failed_list:
233        raise error.AutoservRunError('Following errors occured: %s' %
234                                     failed_list, None)
235
236
237def externalize_host(host):
238    """Returns an externally accessible host name.
239
240    @param host: a host name or address (string)
241
242    @return An externally visible host name or address
243
244    """
245    return socket.gethostname() if host in _LOCAL_HOST_LIST else host
246
247
248def get_lab_status():
249      """Grabs the current lab status and message.
250
251      @returns a dict with keys 'lab_is_up' and 'message'. lab_is_up points
252               to a boolean and message points to a string.
253      """
254      result = {'lab_is_up' : True, 'message' : ''}
255      status_url = global_config.global_config.get_config_value('CROS',
256              'lab_status_url')
257      max_attempts = 5
258      retry_waittime = 1
259      for _ in range(max_attempts):
260          try:
261              response = urllib2.urlopen(status_url)
262          except IOError as e:
263              logging.debug('Error occured when grabbing the lab status: %s.',
264                            e)
265              time.sleep(retry_waittime)
266              continue
267          # Check for successful response code.
268          if response.getcode() == 200:
269              data = json.load(response)
270              result['lab_is_up'] = data['general_state'] in LAB_GOOD_STATES
271              result['message'] = data['message']
272              return result
273          time.sleep(retry_waittime)
274      # We go ahead and say the lab is open if we can't get the status.
275      logging.warn('Could not get a status from %s', status_url)
276      return result
277
278
279def check_lab_status(board=None):
280    """Check if the lab is up and if we can schedule suites to run.
281
282    Also checks if the lab is disabled for that particular board, and if so
283    will raise an error to prevent new suites from being scheduled for that
284    board.
285
286    @param board: board name that we want to check the status of.
287
288    @raises error.LabIsDownException if the lab is not up.
289    @raises error.BoardIsDisabledException if the desired board is currently
290                                           disabled.
291    """
292    # Ensure we are trying to schedule on the actual lab.
293    if not (global_config.global_config.get_config_value('SERVER',
294            'hostname').startswith('cautotest')):
295        return
296
297    # First check if the lab is up.
298    lab_status = get_lab_status()
299    if not lab_status['lab_is_up']:
300        raise error.LabIsDownException('Chromium OS Lab is currently not up: '
301                                       '%s.' % lab_status['message'])
302
303    # Check if the board we wish to use is disabled.
304    # Lab messages should be in the format of:
305    # Lab is 'status' [boards not to be ran] (comment). Example:
306    # Lab is Open [stumpy, kiev, x86-alex] (power_resume rtc causing duts to go
307    # down)
308    boards_are_disabled = re.search('\[(.*)\]', lab_status['message'])
309    if board and boards_are_disabled:
310        if board in boards_are_disabled.group(1):
311            raise error.BoardIsDisabledException('Chromium OS Lab is '
312                    'currently not allowing suites to be scheduled on board '
313                    '%s: %s' % (board, lab_status['message']))
314    return
315
316
317def urlopen_socket_timeout(url, data=None, timeout=5):
318    """
319    Wrapper to urllib2.urlopen with a socket timeout.
320
321    This method will convert all socket timeouts to
322    TimeoutExceptions, so we can use it in conjunction
323    with the rpc retry decorator and continue to handle
324    other URLErrors as we see fit.
325
326    @param url: The url to open.
327    @param data: The data to send to the url (eg: the urlencoded dictionary
328                 used with a POST call).
329    @param timeout: The timeout for this urlopen call.
330
331    @return: The response of the urlopen call.
332
333    @raises: error.TimeoutException when a socket timeout occurs.
334             urllib2.URLError for errors that not caused by timeout.
335             urllib2.HTTPError for errors like 404 url not found.
336    """
337    old_timeout = socket.getdefaulttimeout()
338    socket.setdefaulttimeout(timeout)
339    try:
340        return urllib2.urlopen(url, data=data)
341    except urllib2.URLError as e:
342        if type(e.reason) is socket.timeout:
343            raise error.TimeoutException(str(e))
344        raise
345    finally:
346        socket.setdefaulttimeout(old_timeout)
347