site_utils.py revision 3197b39f82eb92afff33c7d44b805afe120c7627
1# Copyright (c) 2012 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4import json 5import logging 6import os 7import re 8import signal 9import socket 10import time 11import urllib2 12 13from autotest_lib.client.common_lib import base_utils, error, global_config 14from autotest_lib.client.cros import constants 15 16 17# Keep checking if the pid is alive every second until the timeout (in seconds) 18CHECK_PID_IS_ALIVE_TIMEOUT = 6 19 20_LOCAL_HOST_LIST = ('localhost', '127.0.0.1') 21 22LAB_GOOD_STATES = ('open', 'throttled') 23 24 25class ParseBuildNameException(Exception): 26 """Raised when ParseBuildName() cannot parse a build name.""" 27 pass 28 29 30def ParseBuildName(name): 31 """Format a build name, given board, type, milestone, and manifest num. 32 33 @param name: a build name, e.g. 'x86-alex-release/R20-2015.0.0' 34 @return board: board the manifest is for, e.g. x86-alex. 35 @return type: one of 'release', 'factory', or 'firmware' 36 @return milestone: (numeric) milestone the manifest was associated with. 37 @return manifest: manifest number, e.g. '2015.0.0' 38 """ 39 match = re.match(r'([\w-]+)-(\w+)/R(\d+)-([\d.ab-]+)', name) 40 if match and len(match.groups()) == 4: 41 return match.groups() 42 raise ParseBuildNameException('%s is a malformed build name.' % name) 43 44 45def ping(host, deadline=None, tries=None, timeout=60): 46 """Attempt to ping |host|. 47 48 Shell out to 'ping' to try to reach |host| for |timeout| seconds. 49 Returns exit code of ping. 50 51 Per 'man ping', if you specify BOTH |deadline| and |tries|, ping only 52 returns 0 if we get responses to |tries| pings within |deadline| seconds. 53 54 Specifying |deadline| or |count| alone should return 0 as long as 55 some packets receive responses. 56 57 @param host: the host to ping. 58 @param deadline: seconds within which |tries| pings must succeed. 59 @param tries: number of pings to send. 60 @param timeout: number of seconds after which to kill 'ping' command. 61 @return exit code of ping command. 62 """ 63 args = [host] 64 if deadline: 65 args.append('-w%d' % deadline) 66 if tries: 67 args.append('-c%d' % tries) 68 return base_utils.run('ping', args=args, 69 ignore_status=True, timeout=timeout, 70 stdout_tee=base_utils.TEE_TO_LOGS, 71 stderr_tee=base_utils.TEE_TO_LOGS).exit_status 72 73 74def host_is_in_lab_zone(hostname): 75 """Check if the host is in the CROS.dns_zone. 76 77 @param hostname: The hostname to check. 78 @returns True if hostname.dns_zone resolves, otherwise False. 79 """ 80 host_parts = hostname.split('.') 81 dns_zone = global_config.global_config.get_config_value('CROS', 'dns_zone', 82 default=None) 83 fqdn = '%s.%s' % (host_parts[0], dns_zone) 84 try: 85 socket.gethostbyname(fqdn) 86 return True 87 except socket.gaierror: 88 return False 89 90 91def get_chrome_version(job_views): 92 """ 93 Retrieves the version of the chrome binary associated with a job. 94 95 When a test runs we query the chrome binary for it's version and drop 96 that value into a client keyval. To retrieve the chrome version we get all 97 the views associated with a test from the db, including those of the 98 server and client jobs, and parse the version out of the first test view 99 that has it. If we never ran a single test in the suite the job_views 100 dictionary will not contain a chrome version. 101 102 This method cannot retrieve the chrome version from a dictionary that 103 does not conform to the structure of an autotest tko view. 104 105 @param job_views: a list of a job's result views, as returned by 106 the get_detailed_test_views method in rpc_interface. 107 @return: The chrome version string, or None if one can't be found. 108 """ 109 110 # Aborted jobs have no views. 111 if not job_views: 112 return None 113 114 for view in job_views: 115 if (view.get('attributes') 116 and constants.CHROME_VERSION in view['attributes'].keys()): 117 118 return view['attributes'].get(constants.CHROME_VERSION) 119 120 logging.warning('Could not find chrome version for failure.') 121 return None 122 123 124def get_current_board(): 125 """Return the current board name. 126 127 @return current board name, e.g "lumpy", None on fail. 128 """ 129 with open('/etc/lsb-release') as lsb_release_file: 130 for line in lsb_release_file: 131 m = re.match(r'^CHROMEOS_RELEASE_BOARD=(.+)$', line) 132 if m: 133 return m.group(1) 134 return None 135 136 137# TODO(petermayo): crosbug.com/31826 Share this with _GsUpload in 138# //chromite.git/buildbot/prebuilt.py somewhere/somehow 139def gs_upload(local_file, remote_file, acl, result_dir=None, 140 transfer_timeout=300, acl_timeout=300): 141 """Upload to GS bucket. 142 143 @param local_file: Local file to upload 144 @param remote_file: Remote location to upload the local_file to. 145 @param acl: name or file used for controlling access to the uploaded 146 file. 147 @param result_dir: Result directory if you want to add tracing to the 148 upload. 149 @param transfer_timeout: Timeout for this upload call. 150 @param acl_timeout: Timeout for the acl call needed to confirm that 151 the uploader has permissions to execute the upload. 152 153 @raise CmdError: the exit code of the gsutil call was not 0. 154 155 @returns True/False - depending on if the upload succeeded or failed. 156 """ 157 # https://developers.google.com/storage/docs/accesscontrol#extension 158 CANNED_ACLS = ['project-private', 'private', 'public-read', 159 'public-read-write', 'authenticated-read', 160 'bucket-owner-read', 'bucket-owner-full-control'] 161 _GSUTIL_BIN = 'gsutil' 162 acl_cmd = None 163 if acl in CANNED_ACLS: 164 cmd = '%s cp -a %s %s %s' % (_GSUTIL_BIN, acl, local_file, remote_file) 165 else: 166 # For private uploads we assume that the overlay board is set up 167 # properly and a googlestore_acl.xml is present, if not this script 168 # errors 169 cmd = '%s cp -a private %s %s' % (_GSUTIL_BIN, local_file, remote_file) 170 if not os.path.exists(acl): 171 logging.error('Unable to find ACL File %s.', acl) 172 return False 173 acl_cmd = '%s setacl %s %s' % (_GSUTIL_BIN, acl, remote_file) 174 if not result_dir: 175 base_utils.run(cmd, timeout=transfer_timeout, verbose=True) 176 if acl_cmd: 177 base_utils.run(acl_cmd, timeout=acl_timeout, verbose=True) 178 return True 179 with open(os.path.join(result_dir, 'tracing'), 'w') as ftrace: 180 ftrace.write('Preamble\n') 181 base_utils.run(cmd, timeout=transfer_timeout, verbose=True, 182 stdout_tee=ftrace, stderr_tee=ftrace) 183 if acl_cmd: 184 ftrace.write('\nACL setting\n') 185 # Apply the passed in ACL xml file to the uploaded object. 186 base_utils.run(acl_cmd, timeout=acl_timeout, verbose=True, 187 stdout_tee=ftrace, stderr_tee=ftrace) 188 ftrace.write('Postamble\n') 189 return True 190 191 192def gs_ls(uri_pattern): 193 """Returns a list of URIs that match a given pattern. 194 195 @param uri_pattern: a GS URI pattern, may contain wildcards 196 197 @return A list of URIs matching the given pattern. 198 199 @raise CmdError: the gsutil command failed. 200 201 """ 202 gs_cmd = ' '.join(['gsutil', 'ls', uri_pattern]) 203 result = base_utils.system_output(gs_cmd).splitlines() 204 return [path.rstrip() for path in result if path] 205 206 207def nuke_pids(pid_list, signal_queue=[signal.SIGTERM, signal.SIGKILL]): 208 """ 209 Given a list of pid's, kill them via an esclating series of signals. 210 211 @param pid_list: List of PID's to kill. 212 @param signal_queue: Queue of signals to send the PID's to terminate them. 213 """ 214 for sig in signal_queue: 215 logging.debug('Sending signal %s to the following pids:', sig) 216 for pid in pid_list: 217 logging.debug('Pid %d', pid) 218 try: 219 os.kill(pid, sig) 220 except OSError: 221 # The process may have died from a previous signal before we 222 # could kill it. 223 pass 224 time.sleep(CHECK_PID_IS_ALIVE_TIMEOUT) 225 failed_list = [] 226 if signal.SIGKILL in signal_queue: 227 return 228 for pid in pid_list: 229 if base_utils.pid_is_alive(pid): 230 failed_list.append('Could not kill %d for process name: %s.' % pid, 231 base_utils.get_process_name(pid)) 232 if failed_list: 233 raise error.AutoservRunError('Following errors occured: %s' % 234 failed_list, None) 235 236 237def externalize_host(host): 238 """Returns an externally accessible host name. 239 240 @param host: a host name or address (string) 241 242 @return An externally visible host name or address 243 244 """ 245 return socket.gethostname() if host in _LOCAL_HOST_LIST else host 246 247 248def get_lab_status(): 249 """Grabs the current lab status and message. 250 251 @returns a dict with keys 'lab_is_up' and 'message'. lab_is_up points 252 to a boolean and message points to a string. 253 """ 254 result = {'lab_is_up' : True, 'message' : ''} 255 status_url = global_config.global_config.get_config_value('CROS', 256 'lab_status_url') 257 max_attempts = 5 258 retry_waittime = 1 259 for _ in range(max_attempts): 260 try: 261 response = urllib2.urlopen(status_url) 262 except IOError as e: 263 logging.debug('Error occured when grabbing the lab status: %s.', 264 e) 265 time.sleep(retry_waittime) 266 continue 267 # Check for successful response code. 268 if response.getcode() == 200: 269 data = json.load(response) 270 result['lab_is_up'] = data['general_state'] in LAB_GOOD_STATES 271 result['message'] = data['message'] 272 return result 273 time.sleep(retry_waittime) 274 # We go ahead and say the lab is open if we can't get the status. 275 logging.warn('Could not get a status from %s', status_url) 276 return result 277 278 279def check_lab_status(board=None): 280 """Check if the lab is up and if we can schedule suites to run. 281 282 Also checks if the lab is disabled for that particular board, and if so 283 will raise an error to prevent new suites from being scheduled for that 284 board. 285 286 @param board: board name that we want to check the status of. 287 288 @raises error.LabIsDownException if the lab is not up. 289 @raises error.BoardIsDisabledException if the desired board is currently 290 disabled. 291 """ 292 # Ensure we are trying to schedule on the actual lab. 293 if not (global_config.global_config.get_config_value('SERVER', 294 'hostname').startswith('cautotest')): 295 return 296 297 # First check if the lab is up. 298 lab_status = get_lab_status() 299 if not lab_status['lab_is_up']: 300 raise error.LabIsDownException('Chromium OS Lab is currently not up: ' 301 '%s.' % lab_status['message']) 302 303 # Check if the board we wish to use is disabled. 304 # Lab messages should be in the format of: 305 # Lab is 'status' [boards not to be ran] (comment). Example: 306 # Lab is Open [stumpy, kiev, x86-alex] (power_resume rtc causing duts to go 307 # down) 308 boards_are_disabled = re.search('\[(.*)\]', lab_status['message']) 309 if board and boards_are_disabled: 310 if board in boards_are_disabled.group(1): 311 raise error.BoardIsDisabledException('Chromium OS Lab is ' 312 'currently not allowing suites to be scheduled on board ' 313 '%s: %s' % (board, lab_status['message'])) 314 return 315 316 317def urlopen_socket_timeout(url, data=None, timeout=5): 318 """ 319 Wrapper to urllib2.urlopen with a socket timeout. 320 321 This method will convert all socket timeouts to 322 TimeoutExceptions, so we can use it in conjunction 323 with the rpc retry decorator and continue to handle 324 other URLErrors as we see fit. 325 326 @param url: The url to open. 327 @param data: The data to send to the url (eg: the urlencoded dictionary 328 used with a POST call). 329 @param timeout: The timeout for this urlopen call. 330 331 @return: The response of the urlopen call. 332 333 @raises: error.TimeoutException when a socket timeout occurs. 334 """ 335 old_timeout = socket.getdefaulttimeout() 336 socket.setdefaulttimeout(timeout) 337 try: 338 return urllib2.urlopen(url, data=data) 339 except urllib2.URLError as e: 340 if type(e.reason) is socket.timeout: 341 raise error.TimeoutException(str(e)) 342 finally: 343 socket.setdefaulttimeout(old_timeout) 344