dev_server.py revision 58424779a23e853a64562a66ccbbcbbeea3d1209
1# Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5from distutils import version 6import cStringIO 7import HTMLParser 8import httplib 9import json 10import logging 11import multiprocessing 12import os 13import re 14import socket 15import time 16import urllib2 17import urlparse 18 19from autotest_lib.client.bin import utils as site_utils 20from autotest_lib.client.common_lib import android_utils 21from autotest_lib.client.common_lib import error 22from autotest_lib.client.common_lib import global_config 23from autotest_lib.client.common_lib import utils 24from autotest_lib.client.common_lib.cros import retry 25from autotest_lib.client.common_lib.cros.graphite import autotest_stats 26# TODO(cmasone): redo this class using requests module; http://crosbug.com/30107 27 28 29CONFIG = global_config.global_config 30# This file is generated at build time and specifies, per suite and per test, 31# the DEPENDENCIES list specified in each control file. It's a dict of dicts: 32# {'bvt': {'/path/to/autotest/control/site_tests/test1/control': ['dep1']} 33# 'suite': {'/path/to/autotest/control/site_tests/test2/control': ['dep2']} 34# 'power': {'/path/to/autotest/control/site_tests/test1/control': ['dep1'], 35# '/path/to/autotest/control/site_tests/test3/control': ['dep3']} 36# } 37DEPENDENCIES_FILE = 'test_suites/dependency_info' 38# Number of seconds for caller to poll devserver's is_staged call to check if 39# artifacts are staged. 40_ARTIFACT_STAGE_POLLING_INTERVAL = 5 41# Artifacts that should be staged when client calls devserver RPC to stage an 42# image. 43_ARTIFACTS_TO_BE_STAGED_FOR_IMAGE = 'full_payload,test_suites,stateful' 44# Artifacts that should be staged when client calls devserver RPC to stage an 45# image with autotest artifact. 46_ARTIFACTS_TO_BE_STAGED_FOR_IMAGE_WITH_AUTOTEST = ('full_payload,test_suites,' 47 'control_files,stateful,' 48 'autotest_packages') 49# Artifacts that should be staged when client calls devserver RPC to stage an 50# Android build. 51_BRILLO_ARTIFACTS_TO_BE_STAGED_FOR_IMAGE = ('zip_images,vendor_partitions') 52SKIP_DEVSERVER_HEALTH_CHECK = CONFIG.get_config_value( 53 'CROS', 'skip_devserver_health_check', type=bool) 54# Number of seconds for the call to get devserver load to time out. 55TIMEOUT_GET_DEVSERVER_LOAD = 2.0 56 57# Android artifact path in devserver 58ANDROID_BUILD_NAME_PATTERN = CONFIG.get_config_value( 59 'CROS', 'android_build_name_pattern', type=str).replace('\\', '') 60 61# Return value from a devserver RPC indicating the call succeeded. 62SUCCESS = 'Success' 63 64# The timeout minutes for a given devserver ssh call. 65DEVSERVER_SSH_TIMEOUT_MINS = 1 66 67# The timeout minutes for waiting a devserver staging. 68DEVSERVER_IS_STAGING_RETRY_MIN = 100 69 70# The timeout minutes for waiting a DUT auto-update finished. 71DEVSERVER_IS_CROS_AU_FINISHED_TIMEOUT_MIN = 100 72 73# The total times of devserver triggering CrOS auto-update. 74AU_RETRY_LIMIT = 2 75 76# Number of seconds for caller to poll devserver's get_au_status call to 77# check if cros auto-update is finished. 78CROS_AU_POLLING_INTERVAL = 10 79 80# Number of seconds for intervals between retrying auto-update calls. 81CROS_AU_RETRY_INTERVAL = 20 82 83PREFER_LOCAL_DEVSERVER = CONFIG.get_config_value( 84 'CROS', 'prefer_local_devserver', type=bool, default=False) 85 86ENABLE_SSH_CONNECTION_FOR_DEVSERVER = CONFIG.get_config_value( 87 'CROS', 'enable_ssh_connection_for_devserver', type=bool, 88 default=False) 89 90# Directory to save auto-update logs 91AUTO_UPDATE_LOG_DIR = 'autoupdate_logs' 92 93DEFAULT_SUBNET_MASKBIT = 19 94 95_timer = autotest_stats.Timer('devserver') 96 97 98class DevServerException(Exception): 99 """Raised when the dev server returns a non-200 HTTP response.""" 100 pass 101 102 103class MarkupStripper(HTMLParser.HTMLParser): 104 """HTML parser that strips HTML tags, coded characters like & 105 106 Works by, basically, not doing anything for any tags, and only recording 107 the content of text nodes in an internal data structure. 108 """ 109 def __init__(self): 110 self.reset() 111 self.fed = [] 112 113 114 def handle_data(self, d): 115 """Consume content of text nodes, store it away.""" 116 self.fed.append(d) 117 118 119 def get_data(self): 120 """Concatenate and return all stored data.""" 121 return ''.join(self.fed) 122 123 124def _strip_http_message(message): 125 """Strip the HTTP marker from the an HTTP message. 126 127 @param message: A string returned by an HTTP call. 128 129 @return: A string with HTTP marker being stripped. 130 """ 131 strip = MarkupStripper() 132 try: 133 strip.feed(message.decode('utf_32')) 134 except UnicodeDecodeError: 135 strip.feed(message) 136 return strip.get_data() 137 138 139def _get_image_storage_server(): 140 return CONFIG.get_config_value('CROS', 'image_storage_server', type=str) 141 142 143def _get_canary_channel_server(): 144 """ 145 Get the url of the canary-channel server, 146 eg: gsutil://chromeos-releases/canary-channel/<board>/<release> 147 148 @return: The url to the canary channel server. 149 """ 150 return CONFIG.get_config_value('CROS', 'canary_channel_server', type=str) 151 152 153def _get_storage_server_for_artifacts(artifacts=None): 154 """Gets the appropriate storage server for the given artifacts. 155 156 @param artifacts: A list of artifacts we need to stage. 157 @return: The address of the storage server that has these artifacts. 158 The default image storage server if no artifacts are specified. 159 """ 160 factory_artifact = global_config.global_config.get_config_value( 161 'CROS', 'factory_artifact', type=str, default='') 162 if artifacts and factory_artifact and factory_artifact in artifacts: 163 return _get_canary_channel_server() 164 return _get_image_storage_server() 165 166 167def _get_dev_server_list(): 168 return CONFIG.get_config_value('CROS', 'dev_server', type=list, default=[]) 169 170 171def _get_crash_server_list(): 172 return CONFIG.get_config_value('CROS', 'crash_server', type=list, 173 default=[]) 174 175 176def remote_devserver_call(timeout_min=DEVSERVER_IS_STAGING_RETRY_MIN, 177 exception_to_raise=DevServerException): 178 """A decorator to use with remote devserver calls. 179 180 This decorator converts urllib2.HTTPErrors into DevServerExceptions 181 with any embedded error info converted into plain text. The method 182 retries on urllib2.URLError or error.CmdError to avoid devserver flakiness. 183 """ 184 #pylint: disable=C0111 185 def inner_decorator(method): 186 187 @retry.retry((urllib2.URLError, error.CmdError), 188 timeout_min=timeout_min, 189 exception_to_raise=exception_to_raise) 190 def wrapper(*args, **kwargs): 191 """This wrapper actually catches the HTTPError.""" 192 try: 193 return method(*args, **kwargs) 194 except urllib2.HTTPError as e: 195 error_markup = e.read() 196 raise DevServerException(_strip_http_message(error_markup)) 197 198 return wrapper 199 200 return inner_decorator 201 202 203class DevServer(object): 204 """Base class for all DevServer-like server stubs. 205 206 This is the base class for interacting with all Dev Server-like servers. 207 A caller should instantiate a sub-class of DevServer with: 208 209 host = SubClassServer.resolve(build) 210 server = SubClassServer(host) 211 """ 212 _MIN_FREE_DISK_SPACE_GB = 20 213 _MAX_APACHE_CLIENT_COUNT = 75 214 # Threshold for the CPU load percentage for a devserver to be selected. 215 MAX_CPU_LOAD = 80.0 216 # Threshold for the network IO, set to 80MB/s 217 MAX_NETWORK_IO = 1024 * 1024 * 80 218 DISK_IO = 'disk_total_bytes_per_second' 219 NETWORK_IO = 'network_total_bytes_per_second' 220 CPU_LOAD = 'cpu_percent' 221 FREE_DISK = 'free_disk' 222 STAGING_THREAD_COUNT = 'staging_thread_count' 223 APACHE_CLIENT_COUNT = 'apache_client_count' 224 225 226 def __init__(self, devserver): 227 self._devserver = devserver 228 229 230 def url(self): 231 """Returns the url for this devserver.""" 232 return self._devserver 233 234 235 @staticmethod 236 def get_server_name(url): 237 """Strip the http:// prefix and port from a url. 238 239 @param url: A url of a server. 240 241 @return the server name without http:// prefix and port. 242 243 """ 244 return urlparse.urlparse(url).hostname 245 246 247 @staticmethod 248 def get_server_url(url): 249 """Get the devserver url from a repo url, which includes build info. 250 251 @param url: A job repo url. 252 253 @return A devserver url, e.g., http://127.0.0.10:8080 254 """ 255 res = urlparse.urlparse(url) 256 if res.netloc: 257 return res.scheme + '://' + res.netloc 258 259 260 @classmethod 261 def get_devserver_load_wrapper(cls, devserver, timeout_sec, output): 262 """A wrapper function to call get_devserver_load in parallel. 263 264 @param devserver: url of the devserver. 265 @param timeout_sec: Number of seconds before time out the devserver 266 call. 267 @param output: An output queue to save results to. 268 """ 269 load = cls.get_devserver_load(devserver, timeout_min=timeout_sec/60.0) 270 if load: 271 load['devserver'] = devserver 272 output.put(load) 273 274 275 @classmethod 276 def get_devserver_load(cls, devserver, 277 timeout_min=DEVSERVER_SSH_TIMEOUT_MINS): 278 """Returns True if the |devserver| is healthy to stage build. 279 280 @param devserver: url of the devserver. 281 @param timeout_min: How long to wait in minutes before deciding the 282 the devserver is not up (float). 283 284 @return: A dictionary of the devserver's load. 285 286 """ 287 server_name = DevServer.get_server_name(devserver) 288 # statsd treats |.| as path separator. 289 server_name = server_name.replace('.', '_') 290 call = DevServer._build_call(devserver, 'check_health') 291 292 @remote_devserver_call(timeout_min=timeout_min) 293 def make_call(): 294 """Inner method that makes the call.""" 295 return cls.run_call(call, timeout=timeout_min*60) 296 try: 297 result_dict = json.load(cStringIO.StringIO(make_call())) 298 for key, val in result_dict.iteritems(): 299 try: 300 autotest_stats.Gauge(server_name).send(key, float(val)) 301 except ValueError: 302 # Ignore all non-numerical health data. 303 pass 304 305 return result_dict 306 except Exception as e: 307 logging.error('Devserver call failed: "%s", timeout: %s seconds,' 308 ' Error: %s', call, timeout_min * 60, e) 309 310 311 @staticmethod 312 def is_free_disk_ok(load): 313 """Check if a devserver has enough free disk. 314 315 @param load: A dict of the load of the devserver. 316 317 @return: True if the devserver has enough free disk or disk check is 318 skipped in global config. 319 320 """ 321 if SKIP_DEVSERVER_HEALTH_CHECK: 322 logging.debug('devserver health check is skipped.') 323 elif load[DevServer.FREE_DISK] < DevServer._MIN_FREE_DISK_SPACE_GB: 324 return False 325 326 return True 327 328 329 @staticmethod 330 def is_apache_client_count_ok(load): 331 """Check if a devserver has enough Apache connections available. 332 333 Apache server by default has maximum of 150 concurrent connections. If 334 a devserver has too many live connections, it likely indicates the 335 server is busy handling many long running download requests, e.g., 336 downloading stateful partitions. It is better not to add more requests 337 to it. 338 339 @param load: A dict of the load of the devserver. 340 341 @return: True if the devserver has enough Apache connections available, 342 or disk check is skipped in global config. 343 344 """ 345 if SKIP_DEVSERVER_HEALTH_CHECK: 346 logging.debug('devserver health check is skipped.') 347 elif DevServer.APACHE_CLIENT_COUNT not in load: 348 logging.debug('Apache client count is not collected from devserver.') 349 elif (load[DevServer.APACHE_CLIENT_COUNT] > 350 DevServer._MAX_APACHE_CLIENT_COUNT): 351 return False 352 353 return True 354 355 356 @classmethod 357 @_timer.decorate 358 def devserver_healthy(cls, devserver, 359 timeout_min=DEVSERVER_SSH_TIMEOUT_MINS): 360 """Returns True if the |devserver| is healthy to stage build. 361 362 @param devserver: url of the devserver. 363 @param timeout_min: How long to wait in minutes before deciding the 364 the devserver is not up (float). 365 366 @return: True if devserver is healthy. Return False otherwise. 367 368 """ 369 server_name = DevServer.get_server_name(devserver) 370 # statsd treats |.| as path separator. 371 server_name = server_name.replace('.', '_') 372 load = cls.get_devserver_load(devserver, timeout_min=timeout_min) 373 if not load: 374 # Failed to get the load of devserver. 375 autotest_stats.Counter(server_name + 376 '.devserver_not_healthy').increment() 377 return False 378 379 apache_ok = DevServer.is_apache_client_count_ok(load) 380 if not apache_ok: 381 logging.error('Devserver check_health failed. Live Apache client ' 382 'count is too high: %d.', 383 load[DevServer.APACHE_CLIENT_COUNT]) 384 autotest_stats.Counter(server_name + 385 '.devserver_not_healthy').increment() 386 return False 387 388 disk_ok = DevServer.is_free_disk_ok(load) 389 if not disk_ok: 390 logging.error('Devserver check_health failed. Free disk space is ' 391 'low. Only %dGB is available.', 392 load[DevServer.FREE_DISK]) 393 counter = '.devserver_healthy' if disk_ok else '.devserver_not_healthy' 394 # This counter indicates the load of a devserver. By comparing the 395 # value of this counter for all devservers, we can evaluate the 396 # load balancing across all devservers. 397 autotest_stats.Counter(server_name + counter).increment() 398 return disk_ok 399 400 401 @staticmethod 402 def _build_call(host, method, **kwargs): 403 """Build a URL to |host| that calls |method|, passing |kwargs|. 404 405 Builds a URL that calls |method| on the dev server defined by |host|, 406 passing a set of key/value pairs built from the dict |kwargs|. 407 408 @param host: a string that is the host basename e.g. http://server:90. 409 @param method: the dev server method to call. 410 @param kwargs: a dict mapping arg names to arg values. 411 @return the URL string. 412 """ 413 argstr = '&'.join(map(lambda x: "%s=%s" % x, kwargs.iteritems())) 414 return "%(host)s/%(method)s?%(argstr)s" % dict( 415 host=host, method=method, argstr=argstr) 416 417 418 def build_call(self, method, **kwargs): 419 """Builds a devserver RPC string that is used by 'run_call()'. 420 421 @param method: remote devserver method to call. 422 """ 423 return self._build_call(self._devserver, method, **kwargs) 424 425 426 @classmethod 427 def build_all_calls(cls, method, **kwargs): 428 """Builds a list of URLs that makes RPC calls on all devservers. 429 430 Build a URL that calls |method| on the dev server, passing a set 431 of key/value pairs built from the dict |kwargs|. 432 433 @param method: the dev server method to call. 434 @param kwargs: a dict mapping arg names to arg values 435 436 @return the URL string 437 """ 438 calls = [] 439 # Note we use cls.servers as servers is class specific. 440 for server in cls.servers(): 441 if cls.devserver_healthy(server): 442 calls.append(cls._build_call(server, method, **kwargs)) 443 444 return calls 445 446 447 @classmethod 448 def run_call(cls, call, readline=False, timeout=None): 449 """Invoke a given devserver call using urllib.open. 450 451 Open the URL with HTTP, and return the text of the response. Exceptions 452 may be raised as for urllib2.urlopen(). 453 454 @param call: a url string that calls a method to a devserver. 455 @param readline: whether read http response line by line. 456 @param timeout: The timeout seconds for this urlopen call. 457 458 @return the results of this call. 459 """ 460 if timeout is not None: 461 return utils.urlopen_socket_timeout( 462 call, timeout=timeout).read() 463 elif readline: 464 response = urllib2.urlopen(call) 465 return [line.rstrip() for line in response] 466 else: 467 return urllib2.urlopen(call).read() 468 469 470 @staticmethod 471 def servers(): 472 """Returns a list of servers that can serve as this type of server.""" 473 raise NotImplementedError() 474 475 476 @classmethod 477 def get_devservers_in_same_subnet(cls, ip, mask_bits=DEFAULT_SUBNET_MASKBIT, 478 unrestricted_only=False): 479 """Get the devservers in the same subnet of the given ip. 480 481 @param ip: The IP address of a dut to look for devserver. 482 @param mask_bits: Number of mask bits. Default is 19. 483 @param unrestricted_only: Set to True to select from devserver in 484 unrestricted subnet only. Default is False. 485 486 @return: A list of devservers in the same subnet of the given ip. 487 488 """ 489 # server from cls.servers() is a URL, e.g., http://10.1.1.10:8082, so 490 # we need a dict to return the full devserver path once the IPs are 491 # filtered in get_servers_in_same_subnet. 492 server_names = {} 493 all_devservers = [] 494 devservers = (cls.get_unrestricted_devservers() if unrestricted_only 495 else cls.servers()) 496 for server in devservers: 497 server_name = cls.get_server_name(server) 498 server_names[server_name] = server 499 all_devservers.append(server_name) 500 devservers = utils.get_servers_in_same_subnet(ip, mask_bits, 501 all_devservers) 502 return [server_names[s] for s in devservers] 503 504 505 @classmethod 506 def get_unrestricted_devservers( 507 cls, restricted_subnets=utils.RESTRICTED_SUBNETS): 508 """Get the devservers not in any restricted subnet specified in 509 restricted_subnets. 510 511 @param restricted_subnets: A list of restriected subnets. 512 513 @return: A list of devservers not in any restricted subnet. 514 515 """ 516 if not restricted_subnets: 517 return cls.servers() 518 519 devservers = [] 520 for server in cls.servers(): 521 server_name = cls.get_server_name(server) 522 if not utils.get_restricted_subnet(server_name, restricted_subnets): 523 devservers.append(server) 524 return devservers 525 526 527 @classmethod 528 def get_healthy_devserver(cls, build, devservers): 529 """"Get a healthy devserver instance from the list of devservers. 530 531 @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514). 532 533 @return: A DevServer object of a healthy devserver. Return None if no 534 healthy devserver is found. 535 536 """ 537 while devservers: 538 hash_index = hash(build) % len(devservers) 539 devserver = devservers.pop(hash_index) 540 if cls.devserver_healthy(devserver): 541 return cls(devserver) 542 543 544 @classmethod 545 def get_available_devservers(cls, hostname=None, 546 prefer_local_devserver=PREFER_LOCAL_DEVSERVER, 547 restricted_subnets=utils.RESTRICTED_SUBNETS): 548 """Get devservers in the same subnet of the given hostname. 549 550 @param hostname: Hostname of a DUT to choose devserver for. 551 552 @return: A tuple of (devservers, can_retry), devservers is a list of 553 devservers that's available for the given hostname. can_retry 554 is a flag that indicate if caller can retry the selection of 555 devserver if no devserver in the returned devservers can be 556 used. For example, if hostname is in a restricted subnet, 557 can_retry will be False. 558 """ 559 host_ip = None 560 if hostname: 561 host_ip = site_utils.get_ip_address(hostname) 562 if not host_ip: 563 logging.error('Failed to get IP address of %s. Will pick a ' 564 'devserver without subnet constraint.', hostname) 565 566 if not host_ip: 567 return cls.get_unrestricted_devservers(restricted_subnets), False 568 569 # Go through all restricted subnet settings and check if the DUT is 570 # inside a restricted subnet. If so, only return the devservers in the 571 # restricted subnet and doesn't allow retry. 572 if host_ip and restricted_subnets: 573 for subnet_ip, mask_bits in restricted_subnets: 574 if utils.is_in_same_subnet(host_ip, subnet_ip, mask_bits): 575 logging.debug('The host %s (%s) is in a restricted subnet. ' 576 'Try to locate a devserver inside subnet ' 577 '%s:%d.', hostname, host_ip, subnet_ip, 578 mask_bits) 579 devservers = cls.get_devservers_in_same_subnet( 580 subnet_ip, mask_bits) 581 return devservers, False 582 583 # If prefer_local_devserver is set to True and the host is not in 584 # restricted subnet, pick a devserver in the same subnet if possible. 585 # Set can_retry to True so it can pick a different devserver if all 586 # devservers in the same subnet are down. 587 if prefer_local_devserver: 588 return (cls.get_devservers_in_same_subnet( 589 host_ip, DEFAULT_SUBNET_MASKBIT, True), True) 590 591 return cls.get_unrestricted_devservers(restricted_subnets), False 592 593 594 @classmethod 595 def resolve(cls, build, hostname=None): 596 """"Resolves a build to a devserver instance. 597 598 @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514). 599 @param hostname: The hostname of dut that requests a devserver. It's 600 used to make sure a devserver in the same subnet is 601 preferred. 602 603 @raise DevServerException: If no devserver is available. 604 """ 605 tried_devservers = set() 606 devservers, can_retry = cls.get_available_devservers(hostname) 607 if devservers: 608 tried_devservers |= set(devservers) 609 610 devserver = cls.get_healthy_devserver(build, devservers) 611 612 if not devserver and can_retry: 613 # Find available devservers without dut location constrain. 614 devservers, _ = cls.get_available_devservers() 615 devserver = cls.get_healthy_devserver(build, devservers) 616 if devservers: 617 tried_devservers |= set(devservers) 618 if devserver: 619 return devserver 620 else: 621 error_msg = ('All devservers are currently down: %s. ' 622 'dut hostname: %s' % 623 (tried_devservers, hostname)) 624 logging.error(error_msg) 625 raise DevServerException(error_msg) 626 627 628 @classmethod 629 def random(cls): 630 """Return a random devserver that's available. 631 632 Devserver election in `resolve` method is based on a hash of the 633 build that a caller wants to stage. The purpose is that different 634 callers requesting for the same build can get the same devserver, 635 while the lab is able to distribute different builds across all 636 devservers. That helps to reduce the duplication of builds across 637 all devservers. 638 This function returns a random devserver, by passing a random 639 pseudo build name to `resolve `method. 640 """ 641 return cls.resolve(build=str(time.time())) 642 643 644class CrashServer(DevServer): 645 """Class of DevServer that symbolicates crash dumps.""" 646 647 @staticmethod 648 def servers(): 649 return _get_crash_server_list() 650 651 652 @remote_devserver_call() 653 def symbolicate_dump(self, minidump_path, build): 654 """Ask the devserver to symbolicate the dump at minidump_path. 655 656 Stage the debug symbols for |build| and, if that works, ask the 657 devserver to symbolicate the dump at |minidump_path|. 658 659 @param minidump_path: the on-disk path of the minidump. 660 @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514) 661 whose debug symbols are needed for symbolication. 662 @return The contents of the stack trace 663 @raise DevServerException upon any return code that's not HTTP OK. 664 """ 665 try: 666 import requests 667 except ImportError: 668 logging.warning("Can't 'import requests' to connect to dev server.") 669 return '' 670 server_name = self.get_server_name(self.url()) 671 server_name = server_name.replace('.', '_') 672 stats_key = 'CrashServer.%s.symbolicate_dump' % server_name 673 autotest_stats.Counter(stats_key).increment() 674 timer = autotest_stats.Timer(stats_key) 675 timer.start() 676 # Symbolicate minidump. 677 call = self.build_call('symbolicate_dump', 678 archive_url=_get_image_storage_server() + build) 679 request = requests.post( 680 call, files={'minidump': open(minidump_path, 'rb')}) 681 if request.status_code == requests.codes.OK: 682 timer.stop() 683 return request.text 684 685 error_fd = cStringIO.StringIO(request.text) 686 raise urllib2.HTTPError( 687 call, request.status_code, request.text, request.headers, 688 error_fd) 689 690 691 @classmethod 692 def get_available_devservers(cls, hostname): 693 """Get all available crash servers. 694 695 Crash server election doesn't need to count the location of hostname. 696 697 @param hostname: Hostname of a DUT to choose devserver for. 698 699 @return: A tuple of (all crash servers, False). can_retry is set to 700 False, as all crash servers are returned. There is no point to 701 retry. 702 """ 703 return cls.servers(), False 704 705 706class ImageServerBase(DevServer): 707 """Base class for devservers used to stage builds. 708 709 CrOS and Android builds are staged in different ways as they have different 710 sets of artifacts. This base class abstracts the shared functions between 711 the two types of ImageServer. 712 """ 713 714 @classmethod 715 def servers(cls): 716 """Returns a list of servers that can serve as a desired type of 717 devserver. 718 """ 719 return _get_dev_server_list() 720 721 722 def _get_image_url(self, image): 723 """Returns the url of the directory for this image on the devserver. 724 725 @param image: the image that was fetched. 726 """ 727 image = self.translate(image) 728 url_pattern = CONFIG.get_config_value('CROS', 'image_url_pattern', 729 type=str) 730 return (url_pattern % (self.url(), image)).replace('update', 'static') 731 732 733 @staticmethod 734 def create_stats_str(subname, server_name, artifacts): 735 """Create a graphite name given the staged items. 736 737 The resulting name will look like 738 'dev_server.subname.DEVSERVER_URL.artifact1_artifact2' 739 The name can be used to create a stats object like 740 stats.Timer, stats.Counter, etc. 741 742 @param subname: A name for the graphite sub path. 743 @param server_name: name of the devserver, e.g 172.22.33.44. 744 @param artifacts: A list of artifacts. 745 746 @return A name described above. 747 748 """ 749 staged_items = sorted(artifacts) if artifacts else [] 750 staged_items_str = '_'.join(staged_items).replace( 751 '.', '_') if staged_items else None 752 server_name = server_name.replace('.', '_') 753 stats_str = 'dev_server.%s.%s' % (subname, server_name) 754 if staged_items_str: 755 stats_str += '.%s' % staged_items_str 756 return stats_str 757 758 759 @staticmethod 760 def create_metadata(server_name, image, artifacts=None, files=None): 761 """Create a metadata dictionary given the staged items. 762 763 The metadata can be send to metadata db along with stats. 764 765 @param server_name: name of the devserver, e.g 172.22.33.44. 766 @param image: The name of the image. 767 @param artifacts: A list of artifacts. 768 @param files: A list of files. 769 770 @return A metadata dictionary. 771 772 """ 773 metadata = {'devserver': server_name, 774 'image': image, 775 '_type': 'devserver'} 776 if artifacts: 777 metadata['artifacts'] = ' '.join(artifacts) 778 if files: 779 metadata['files'] = ' '.join(files) 780 return metadata 781 782 783 @classmethod 784 def run_ssh_call(cls, call, readline=False, timeout=None): 785 """Construct an ssh-based rpc call, and execute it. 786 787 @param call: a url string that calls a method to a devserver. 788 @param readline: whether read http response line by line. 789 @param timeout: The timeout seconds for ssh call. 790 791 @return the results of this call. 792 """ 793 hostname = urlparse.urlparse(call).hostname 794 ssh_call = 'ssh %s \'curl "%s"\'' % (hostname, utils.sh_escape(call)) 795 timeout_seconds = timeout if timeout else DEVSERVER_SSH_TIMEOUT_MINS*60 796 try: 797 result = utils.run(ssh_call, timeout=timeout_seconds) 798 except error.CmdError as e: 799 logging.debug('Error occurred with exit_code %d when executing the ' 800 'ssh call: %s.', e.result_obj.exit_status, 801 e.result_obj.stderr) 802 stats_str = 'dev_server.%s.%s' % (hostname.replace('.', '_'), 803 'ssh_dev_server_failure') 804 autotest_stats.Counter(stats_str).increment() 805 raise 806 response = result.stdout 807 808 # If the curl command's returned HTTP response contains certain 809 # exception string, raise the DevServerException of the response. 810 if 'DownloaderException' in response: 811 raise DevServerException(_strip_http_message(response)) 812 813 if readline: 814 # Remove line terminators and trailing whitespace 815 response = response.splitlines() 816 return [line.rstrip() for line in response] 817 818 return response 819 820 821 @classmethod 822 def run_call(cls, call, readline=False, timeout=None): 823 """Invoke a given devserver call using urllib.open or ssh. 824 825 Open the URL with HTTP or SSH-based HTTP, and return the text of the 826 response. Exceptions may be raised as for urllib2.urlopen() or 827 utils.run(). 828 829 @param call: a url string that calls a method to a devserver. 830 @param readline: whether read http response line by line. 831 @param timeout: The timeout seconds for urlopen call or ssh call. 832 833 @return the results of this call. 834 """ 835 if not ENABLE_SSH_CONNECTION_FOR_DEVSERVER: 836 return super(ImageServerBase, cls).run_call( 837 call, readline=readline, timeout=timeout) 838 else: 839 return cls.run_ssh_call( 840 call, readline=readline, timeout=timeout) 841 842 843 def _poll_is_staged(self, **kwargs): 844 """Polling devserver.is_staged until all artifacts are staged. 845 846 @param kwargs: keyword arguments to make is_staged devserver call. 847 848 @return: True if all artifacts are staged in devserver. 849 """ 850 call = self.build_call('is_staged', **kwargs) 851 852 def all_staged(): 853 """Call devserver.is_staged rpc to check if all files are staged. 854 855 @return: True if all artifacts are staged in devserver. False 856 otherwise. 857 @rasies DevServerException, the exception is a wrapper of all 858 exceptions that were raised when devserver tried to download 859 the artifacts. devserver raises an HTTPError or a CmdError 860 when an exception was raised in the code. Such exception 861 should be re-raised here to stop the caller from waiting. 862 If the call to devserver failed for connection issue, a 863 URLError exception is raised, and caller should retry the 864 call to avoid such network flakiness. 865 866 """ 867 try: 868 result = self.run_call(call) 869 logging.debug('whether artifact is staged: %r', result) 870 return result == 'True' 871 except urllib2.HTTPError as e: 872 error_markup = e.read() 873 raise DevServerException(_strip_http_message(error_markup)) 874 except urllib2.URLError as e: 875 # Could be connection issue, retry it. 876 # For example: <urlopen error [Errno 111] Connection refused> 877 logging.error('URLError happens in is_stage: %r', e) 878 return False 879 except error.CmdError as e: 880 # Retry if SSH failed to connect to the devserver. 881 logging.warning('CmdError happens in is_stage: %r, will retry', e) 882 return False 883 884 site_utils.poll_for_condition( 885 all_staged, 886 exception=site_utils.TimeoutError(), 887 timeout=DEVSERVER_IS_STAGING_RETRY_MIN * 60, 888 sleep_interval=_ARTIFACT_STAGE_POLLING_INTERVAL) 889 890 return True 891 892 893 def _call_and_wait(self, call_name, error_message, 894 expected_response=SUCCESS, **kwargs): 895 """Helper method to make a urlopen call, and wait for artifacts staged. 896 897 @param call_name: name of devserver rpc call. 898 @param error_message: Error message to be thrown if response does not 899 match expected_response. 900 @param expected_response: Expected response from rpc, default to 901 |Success|. If it's set to None, do not compare 902 the actual response. Any response is consider 903 to be good. 904 @param kwargs: keyword arguments to make is_staged devserver call. 905 906 @return: The response from rpc. 907 @raise DevServerException upon any return code that's expected_response. 908 909 """ 910 call = self.build_call(call_name, async=True, **kwargs) 911 try: 912 response = self.run_call(call) 913 logging.debug('response for RPC: %r', response) 914 except httplib.BadStatusLine as e: 915 logging.error(e) 916 raise DevServerException('Received Bad Status line, Devserver %s ' 917 'might have gone down while handling ' 918 'the call: %s' % (self.url(), call)) 919 920 if expected_response and not response == expected_response: 921 raise DevServerException(error_message) 922 923 # `os_type` is needed in build a devserver call, but not needed for 924 # wait_for_artifacts_staged, since that method is implemented by 925 # each ImageServerBase child class. 926 if 'os_type' in kwargs: 927 del kwargs['os_type'] 928 self.wait_for_artifacts_staged(**kwargs) 929 return response 930 931 932 def _stage_artifacts(self, build, artifacts, files, archive_url, **kwargs): 933 """Tell the devserver to download and stage |artifacts| from |image| 934 specified by kwargs. 935 936 This is the main call point for staging any specific artifacts for a 937 given build. To see the list of artifacts one can stage see: 938 939 ~src/platfrom/dev/artifact_info.py. 940 941 This is maintained along with the actual devserver code. 942 943 @param artifacts: A list of artifacts. 944 @param files: A list of files to stage. 945 @param archive_url: Optional parameter that has the archive_url to stage 946 this artifact from. Default is specified in autotest config + 947 image. 948 @param kwargs: keyword arguments that specify the build information, to 949 make stage devserver call. 950 951 @raise DevServerException upon any return code that's not HTTP OK. 952 """ 953 if not archive_url: 954 archive_url = _get_storage_server_for_artifacts(artifacts) + build 955 956 artifacts_arg = ','.join(artifacts) if artifacts else '' 957 files_arg = ','.join(files) if files else '' 958 error_message = ("staging %s for %s failed;" 959 "HTTP OK not accompanied by 'Success'." % 960 ('artifacts=%s files=%s ' % (artifacts_arg, files_arg), 961 build)) 962 963 staging_info = ('build=%s, artifacts=%s, files=%s, archive_url=%s' % 964 (build, artifacts, files, archive_url)) 965 logging.info('Staging artifacts on devserver %s: %s', 966 self.url(), staging_info) 967 if artifacts: 968 server_name = self.get_server_name(self.url()) 969 timer_key = self.create_stats_str( 970 'stage_artifacts', server_name, artifacts) 971 counter_key = self.create_stats_str( 972 'stage_artifacts_count', server_name, artifacts) 973 metadata = self.create_metadata(server_name, build, artifacts, 974 files) 975 autotest_stats.Counter(counter_key, metadata=metadata).increment() 976 timer = autotest_stats.Timer(timer_key, metadata=metadata) 977 timer.start() 978 try: 979 arguments = {'archive_url': archive_url, 980 'artifacts': artifacts_arg, 981 'files': files_arg} 982 if kwargs: 983 arguments.update(kwargs) 984 self.call_and_wait(call_name='stage', error_message=error_message, 985 **arguments) 986 if artifacts: 987 timer.stop() 988 logging.info('Finished staging artifacts: %s', staging_info) 989 except (site_utils.TimeoutError, error.TimeoutException): 990 logging.error('stage_artifacts timed out: %s', staging_info) 991 if artifacts: 992 timeout_key = self.create_stats_str( 993 'stage_artifacts_timeout', server_name, artifacts) 994 autotest_stats.Counter(timeout_key, 995 metadata=metadata).increment() 996 raise DevServerException( 997 'stage_artifacts timed out: %s' % staging_info) 998 999 1000 def call_and_wait(self, *args, **kwargs): 1001 """Helper method to make a urlopen call, and wait for artifacts staged. 1002 1003 This method needs to be overridden in the subclass to implement the 1004 logic to call _call_and_wait. 1005 """ 1006 raise NotImplementedError 1007 1008 1009 def _trigger_download(self, build, artifacts, files, synchronous=True, 1010 **kwargs_build_info): 1011 """Tell the devserver to download and stage image specified in 1012 kwargs_build_info. 1013 1014 Tells the devserver to fetch |image| from the image storage server 1015 named by _get_image_storage_server(). 1016 1017 If |synchronous| is True, waits for the entire download to finish 1018 staging before returning. Otherwise only the artifacts necessary 1019 to start installing images onto DUT's will be staged before returning. 1020 A caller can then call finish_download to guarantee the rest of the 1021 artifacts have finished staging. 1022 1023 @param synchronous: if True, waits until all components of the image are 1024 staged before returning. 1025 @param kwargs_build_info: Dictionary of build information. 1026 For CrOS, it is None as build is the CrOS image name. 1027 For Android, it is {'target': target, 1028 'build_id': build_id, 1029 'branch': branch} 1030 1031 @raise DevServerException upon any return code that's not HTTP OK. 1032 1033 """ 1034 if kwargs_build_info: 1035 archive_url = None 1036 else: 1037 archive_url = _get_image_storage_server() + build 1038 error_message = ("trigger_download for %s failed;" 1039 "HTTP OK not accompanied by 'Success'." % build) 1040 kwargs = {'archive_url': archive_url, 1041 'artifacts': artifacts, 1042 'files': files, 1043 'error_message': error_message} 1044 if kwargs_build_info: 1045 kwargs.update(kwargs_build_info) 1046 1047 logging.info('trigger_download starts for %s', build) 1048 server_name = self.get_server_name(self.url()) 1049 artifacts_list = artifacts.split(',') 1050 counter_key = self.create_stats_str( 1051 'trigger_download_count', server_name, artifacts_list) 1052 metadata = self.create_metadata(server_name, build, artifacts_list) 1053 autotest_stats.Counter(counter_key, metadata=metadata).increment() 1054 try: 1055 response = self.call_and_wait(call_name='stage', **kwargs) 1056 logging.info('trigger_download finishes for %s', build) 1057 except (site_utils.TimeoutError, error.TimeoutException): 1058 logging.error('trigger_download timed out for %s.', build) 1059 timeout_key = self.create_stats_str( 1060 'trigger_download_timeout', server_name, artifacts_list) 1061 autotest_stats.Counter(timeout_key, metadata=metadata).increment() 1062 raise DevServerException( 1063 'trigger_download timed out for %s.' % build) 1064 was_successful = response == SUCCESS 1065 if was_successful and synchronous: 1066 self._finish_download(build, artifacts, files, **kwargs_build_info) 1067 1068 1069 def _finish_download(self, build, artifacts, files, **kwargs_build_info): 1070 """Tell the devserver to finish staging image specified in 1071 kwargs_build_info. 1072 1073 If trigger_download is called with synchronous=False, it will return 1074 before all artifacts have been staged. This method contacts the 1075 devserver and blocks until all staging is completed and should be 1076 called after a call to trigger_download. 1077 1078 @param kwargs_build_info: Dictionary of build information. 1079 For CrOS, it is None as build is the CrOS image name. 1080 For Android, it is {'target': target, 1081 'build_id': build_id, 1082 'branch': branch} 1083 1084 @raise DevServerException upon any return code that's not HTTP OK. 1085 """ 1086 archive_url = _get_image_storage_server() + build 1087 error_message = ("finish_download for %s failed;" 1088 "HTTP OK not accompanied by 'Success'." % build) 1089 kwargs = {'archive_url': archive_url, 1090 'artifacts': artifacts, 1091 'files': files, 1092 'error_message': error_message} 1093 if kwargs_build_info: 1094 kwargs.update(kwargs_build_info) 1095 try: 1096 self.call_and_wait(call_name='stage', **kwargs) 1097 except (site_utils.TimeoutError, error.TimeoutException): 1098 logging.error('finish_download timed out for %s', build) 1099 server_name = self.get_server_name(self.url()) 1100 artifacts_list = artifacts.split(',') 1101 timeout_key = self.create_stats_str( 1102 'finish_download_timeout', server_name, artifacts_list) 1103 metadata = self.create_metadata(server_name, build, artifacts_list) 1104 autotest_stats.Counter(timeout_key, metadata=metadata).increment() 1105 raise DevServerException( 1106 'finish_download timed out for %s.' % build) 1107 1108 1109 @remote_devserver_call() 1110 def locate_file(self, file_name, artifacts, build, build_info): 1111 """Locate a file with the given file_name on devserver. 1112 1113 This method calls devserver RPC `locate_file` to look up a file with 1114 the given file name inside specified build artifacts. 1115 1116 @param file_name: Name of the file to look for a file. 1117 @param artifacts: A list of artifact names to search for the file. 1118 @param build: Name of the build. For Android, it's None as build_info 1119 should be used. 1120 @param build_info: Dictionary of build information. 1121 For CrOS, it is None as build is the CrOS image name. 1122 For Android, it is {'target': target, 1123 'build_id': build_id, 1124 'branch': branch} 1125 1126 @return: A devserver url to the file. 1127 @raise DevServerException upon any return code that's not HTTP OK. 1128 """ 1129 if not build and not build_info: 1130 raise DevServerException('You must specify build information to ' 1131 'look for file %s in artifacts %s.' % 1132 (file_name, artifacts)) 1133 kwargs = {'file_name': file_name, 1134 'artifacts': artifacts} 1135 if build_info: 1136 build_path = '%(branch)s/%(target)s/%(build_id)s' % build_info 1137 kwargs.update(build_info) 1138 # Devserver treats Android and Brillo build in the same way as they 1139 # are both retrieved from Launch Control and have similar build 1140 # artifacts. Therefore, os_type for devserver calls is `android` for 1141 # both Android and Brillo builds. 1142 kwargs['os_type'] = 'android' 1143 else: 1144 build_path = build 1145 kwargs['build'] = build 1146 call = self.build_call('locate_file', async=False, **kwargs) 1147 try: 1148 file_path = self.run_call(call) 1149 return os.path.join(self.url(), 'static', build_path, file_path) 1150 except httplib.BadStatusLine as e: 1151 logging.error(e) 1152 raise DevServerException('Received Bad Status line, Devserver %s ' 1153 'might have gone down while handling ' 1154 'the call: %s' % (self.url(), call)) 1155 1156 1157 @remote_devserver_call() 1158 def list_control_files(self, build, suite_name=''): 1159 """Ask the devserver to list all control files for |build|. 1160 1161 @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514) 1162 whose control files the caller wants listed. 1163 @param suite_name: The name of the suite for which we require control 1164 files. 1165 @return None on failure, or a list of control file paths 1166 (e.g. server/site_tests/autoupdate/control) 1167 @raise DevServerException upon any return code that's not HTTP OK. 1168 """ 1169 build = self.translate(build) 1170 call = self.build_call('controlfiles', build=build, 1171 suite_name=suite_name) 1172 return self.run_call(call, readline=True) 1173 1174 1175 @remote_devserver_call() 1176 def get_control_file(self, build, control_path): 1177 """Ask the devserver for the contents of a control file. 1178 1179 @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514) 1180 whose control file the caller wants to fetch. 1181 @param control_path: The file to fetch 1182 (e.g. server/site_tests/autoupdate/control) 1183 @return The contents of the desired file. 1184 @raise DevServerException upon any return code that's not HTTP OK. 1185 """ 1186 build = self.translate(build) 1187 call = self.build_call('controlfiles', build=build, 1188 control_path=control_path) 1189 return self.run_call(call) 1190 1191 1192 @remote_devserver_call() 1193 def list_suite_controls(self, build, suite_name=''): 1194 """Ask the devserver to list contents of all control files for |build|. 1195 1196 @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514) 1197 whose control files' contents the caller wants returned. 1198 @param suite_name: The name of the suite for which we require control 1199 files. 1200 @return None on failure, or a dict of contents of all control files 1201 (e.g. {'path1': "#Copyright controls ***", ..., 1202 pathX': "#Copyright controls ***"} 1203 @raise DevServerException upon any return code that's not HTTP OK. 1204 """ 1205 build = self.translate(build) 1206 call = self.build_call('list_suite_controls', build=build, 1207 suite_name=suite_name) 1208 return json.load(cStringIO.StringIO(self.run_call(call))) 1209 1210 1211class ImageServer(ImageServerBase): 1212 """Class for DevServer that handles RPCs related to CrOS images. 1213 1214 The calls to devserver to stage artifacts, including stage and download, are 1215 made in async mode. That is, when caller makes an RPC |stage| to request 1216 devserver to stage certain artifacts, devserver handles the call and starts 1217 staging artifacts in a new thread, and return |Success| without waiting for 1218 staging being completed. When caller receives message |Success|, it polls 1219 devserver's is_staged call until all artifacts are staged. 1220 Such mechanism is designed to prevent cherrypy threads in devserver being 1221 running out, as staging artifacts might take long time, and cherrypy starts 1222 with a fixed number of threads that handle devserver rpc. 1223 """ 1224 1225 class ArtifactUrls(object): 1226 """A container for URLs of staged artifacts. 1227 1228 Attributes: 1229 full_payload: URL for downloading a staged full release update 1230 mton_payload: URL for downloading a staged M-to-N release update 1231 nton_payload: URL for downloading a staged N-to-N release update 1232 1233 """ 1234 def __init__(self, full_payload=None, mton_payload=None, 1235 nton_payload=None): 1236 self.full_payload = full_payload 1237 self.mton_payload = mton_payload 1238 self.nton_payload = nton_payload 1239 1240 1241 def wait_for_artifacts_staged(self, archive_url, artifacts='', files=''): 1242 """Polling devserver.is_staged until all artifacts are staged. 1243 1244 @param archive_url: Google Storage URL for the build. 1245 @param artifacts: Comma separated list of artifacts to download. 1246 @param files: Comma separated list of files to download. 1247 @return: True if all artifacts are staged in devserver. 1248 """ 1249 kwargs = {'archive_url': archive_url, 1250 'artifacts': artifacts, 1251 'files': files} 1252 return self._poll_is_staged(**kwargs) 1253 1254 1255 @remote_devserver_call() 1256 def call_and_wait(self, call_name, archive_url, artifacts, files, 1257 error_message, expected_response=SUCCESS): 1258 """Helper method to make a urlopen call, and wait for artifacts staged. 1259 1260 @param call_name: name of devserver rpc call. 1261 @param archive_url: Google Storage URL for the build.. 1262 @param artifacts: Comma separated list of artifacts to download. 1263 @param files: Comma separated list of files to download. 1264 @param expected_response: Expected response from rpc, default to 1265 |Success|. If it's set to None, do not compare 1266 the actual response. Any response is consider 1267 to be good. 1268 @param error_message: Error message to be thrown if response does not 1269 match expected_response. 1270 1271 @return: The response from rpc. 1272 @raise DevServerException upon any return code that's expected_response. 1273 1274 """ 1275 kwargs = {'archive_url': archive_url, 1276 'artifacts': artifacts, 1277 'files': files} 1278 return self._call_and_wait(call_name, error_message, 1279 expected_response, **kwargs) 1280 1281 1282 @remote_devserver_call() 1283 def stage_artifacts(self, image=None, artifacts=None, files='', 1284 archive_url=None): 1285 """Tell the devserver to download and stage |artifacts| from |image|. 1286 1287 This is the main call point for staging any specific artifacts for a 1288 given build. To see the list of artifacts one can stage see: 1289 1290 ~src/platfrom/dev/artifact_info.py. 1291 1292 This is maintained along with the actual devserver code. 1293 1294 @param image: the image to fetch and stage. 1295 @param artifacts: A list of artifacts. 1296 @param files: A list of files to stage. 1297 @param archive_url: Optional parameter that has the archive_url to stage 1298 this artifact from. Default is specified in autotest config + 1299 image. 1300 1301 @raise DevServerException upon any return code that's not HTTP OK. 1302 """ 1303 if not artifacts and not files: 1304 raise DevServerException('Must specify something to stage.') 1305 image = self.translate(image) 1306 self._stage_artifacts(image, artifacts, files, archive_url) 1307 1308 1309 @remote_devserver_call(timeout_min=DEVSERVER_SSH_TIMEOUT_MINS) 1310 def list_image_dir(self, image): 1311 """List the contents of the image stage directory, on the devserver. 1312 1313 @param image: The image name, eg: <board>-<branch>/<Milestone>-<build>. 1314 1315 @raise DevServerException upon any return code that's not HTTP OK. 1316 """ 1317 image = self.translate(image) 1318 logging.info('Requesting contents from devserver %s for image %s', 1319 self.url(), image) 1320 archive_url = _get_storage_server_for_artifacts() + image 1321 call = self.build_call('list_image_dir', archive_url=archive_url) 1322 response = self.run_call(call, readline=True) 1323 for line in response: 1324 logging.info(line) 1325 1326 1327 def trigger_download(self, image, synchronous=True): 1328 """Tell the devserver to download and stage |image|. 1329 1330 Tells the devserver to fetch |image| from the image storage server 1331 named by _get_image_storage_server(). 1332 1333 If |synchronous| is True, waits for the entire download to finish 1334 staging before returning. Otherwise only the artifacts necessary 1335 to start installing images onto DUT's will be staged before returning. 1336 A caller can then call finish_download to guarantee the rest of the 1337 artifacts have finished staging. 1338 1339 @param image: the image to fetch and stage. 1340 @param synchronous: if True, waits until all components of the image are 1341 staged before returning. 1342 1343 @raise DevServerException upon any return code that's not HTTP OK. 1344 1345 """ 1346 image = self.translate(image) 1347 artifacts = _ARTIFACTS_TO_BE_STAGED_FOR_IMAGE 1348 self._trigger_download(image, artifacts, files='', 1349 synchronous=synchronous) 1350 1351 1352 @remote_devserver_call() 1353 def setup_telemetry(self, build): 1354 """Tell the devserver to setup telemetry for this build. 1355 1356 The devserver will stage autotest and then extract the required files 1357 for telemetry. 1358 1359 @param build: the build to setup telemetry for. 1360 1361 @returns path on the devserver that telemetry is installed to. 1362 """ 1363 build = self.translate(build) 1364 archive_url = _get_image_storage_server() + build 1365 call = self.build_call('setup_telemetry', archive_url=archive_url) 1366 try: 1367 response = self.run_call(call) 1368 except httplib.BadStatusLine as e: 1369 logging.error(e) 1370 raise DevServerException('Received Bad Status line, Devserver %s ' 1371 'might have gone down while handling ' 1372 'the call: %s' % (self.url(), call)) 1373 return response 1374 1375 1376 def finish_download(self, image): 1377 """Tell the devserver to finish staging |image|. 1378 1379 If trigger_download is called with synchronous=False, it will return 1380 before all artifacts have been staged. This method contacts the 1381 devserver and blocks until all staging is completed and should be 1382 called after a call to trigger_download. 1383 1384 @param image: the image to fetch and stage. 1385 @raise DevServerException upon any return code that's not HTTP OK. 1386 """ 1387 image = self.translate(image) 1388 artifacts = _ARTIFACTS_TO_BE_STAGED_FOR_IMAGE_WITH_AUTOTEST 1389 self._finish_download(image, artifacts, files='') 1390 1391 1392 def get_update_url(self, image): 1393 """Returns the url that should be passed to the updater. 1394 1395 @param image: the image that was fetched. 1396 """ 1397 image = self.translate(image) 1398 url_pattern = CONFIG.get_config_value('CROS', 'image_url_pattern', 1399 type=str) 1400 return (url_pattern % (self.url(), image)) 1401 1402 1403 def get_staged_file_url(self, filename, image): 1404 """Returns the url of a staged file for this image on the devserver.""" 1405 return '/'.join([self._get_image_url(image), filename]) 1406 1407 1408 def get_full_payload_url(self, image): 1409 """Returns a URL to a staged full payload. 1410 1411 @param image: the image that was fetched. 1412 1413 @return A fully qualified URL that can be used for downloading the 1414 payload. 1415 1416 """ 1417 return self._get_image_url(image) + '/update.gz' 1418 1419 1420 def get_test_image_url(self, image): 1421 """Returns a URL to a staged test image. 1422 1423 @param image: the image that was fetched. 1424 1425 @return A fully qualified URL that can be used for downloading the 1426 image. 1427 1428 """ 1429 return self._get_image_url(image) + '/chromiumos_test_image.bin' 1430 1431 1432 @remote_devserver_call() 1433 def get_dependencies_file(self, build): 1434 """Ask the dev server for the contents of the suite dependencies file. 1435 1436 Ask the dev server at |self._dev_server| for the contents of the 1437 pre-processed suite dependencies file (at DEPENDENCIES_FILE) 1438 for |build|. 1439 1440 @param build: The build (e.g. x86-mario-release/R21-2333.0.0) 1441 whose dependencies the caller is interested in. 1442 @return The contents of the dependencies file, which should eval to 1443 a dict of dicts, as per site_utils/suite_preprocessor.py. 1444 @raise DevServerException upon any return code that's not HTTP OK. 1445 """ 1446 build = self.translate(build) 1447 call = self.build_call('controlfiles', 1448 build=build, control_path=DEPENDENCIES_FILE) 1449 return self.run_call(call) 1450 1451 1452 @remote_devserver_call() 1453 def get_latest_build_in_gs(self, board): 1454 """Ask the devservers for the latest offical build in Google Storage. 1455 1456 @param board: The board for who we want the latest official build. 1457 @return A string of the returned build rambi-release/R37-5868.0.0 1458 @raise DevServerException upon any return code that's not HTTP OK. 1459 """ 1460 call = self.build_call( 1461 'xbuddy_translate/remote/%s/latest-official' % board, 1462 image_dir=_get_image_storage_server()) 1463 image_name = self.run_call(call) 1464 return os.path.dirname(image_name) 1465 1466 1467 def translate(self, build_name): 1468 """Translate the build name if it's in LATEST format. 1469 1470 If the build name is in the format [builder]/LATEST, return the latest 1471 build in Google Storage otherwise return the build name as is. 1472 1473 @param build_name: build_name to check. 1474 1475 @return The actual build name to use. 1476 """ 1477 match = re.match(r'([\w-]+)-(\w+)/LATEST', build_name, re.I) 1478 if not match: 1479 return build_name 1480 translated_build = self.get_latest_build_in_gs(match.groups()[0]) 1481 logging.debug('Translated relative build %s to %s', build_name, 1482 translated_build) 1483 return translated_build 1484 1485 1486 @classmethod 1487 @remote_devserver_call() 1488 def get_latest_build(cls, target, milestone=''): 1489 """Ask all the devservers for the latest build for a given target. 1490 1491 @param target: The build target, typically a combination of the board 1492 and the type of build e.g. x86-mario-release. 1493 @param milestone: For latest build set to '', for builds only in a 1494 specific milestone set to a str of format Rxx 1495 (e.g. R16). Default: ''. Since we are dealing with a 1496 webserver sending an empty string, '', ensures that 1497 the variable in the URL is ignored as if it was set 1498 to None. 1499 @return A string of the returned build e.g. R20-2226.0.0. 1500 @raise DevServerException upon any return code that's not HTTP OK. 1501 """ 1502 calls = cls.build_all_calls('latestbuild', target=target, 1503 milestone=milestone) 1504 latest_builds = [] 1505 for call in calls: 1506 latest_builds.append(cls.run_call(call)) 1507 1508 return max(latest_builds, key=version.LooseVersion) 1509 1510 1511 @remote_devserver_call() 1512 def _kill_au_process_for_host(self, **kwargs): 1513 """Kill the triggerred auto_update process if error happens in cros_au. 1514 1515 @param kwargs: Arguments to make kill_au_proc devserver call. 1516 """ 1517 call = self.build_call('kill_au_proc', **kwargs) 1518 response = self.run_call(call) 1519 if not response == 'True': 1520 raise DevServerException( 1521 'Failed to kill the triggerred CrOS auto_update process' 1522 'on devserver %s, the response is %s' % ( 1523 self.url(), response)) 1524 1525 1526 def kill_au_process_for_host(self, host_name): 1527 """Kill the triggerred auto_update process if error happens. 1528 1529 @param host_name: The DUT's hostname. 1530 1531 @return: True if successfully kill the auto-update process for host. 1532 """ 1533 kwargs = {'host_name': host_name} 1534 try: 1535 self._kill_au_process_for_host(**kwargs) 1536 except DevServerException: 1537 return False 1538 1539 return True 1540 1541 1542 @remote_devserver_call() 1543 def _clean_track_log(self, **kwargs): 1544 """Clean track log for the current auto-update process.""" 1545 call = self.build_call('handler_cleanup', **kwargs) 1546 self.run_call(call) 1547 1548 1549 def clean_track_log(self, host_name, pid): 1550 """Clean track log for the current auto-update process. 1551 1552 @param host_name: The host name to be updated. 1553 @param pid: The auto-update process id. 1554 1555 @return: True if track log is successfully cleaned, False otherwise. 1556 """ 1557 if not pid: 1558 return False 1559 1560 kwargs = {'host_name': host_name, 'pid': pid} 1561 try: 1562 self._clean_track_log(**kwargs) 1563 except DevServerException as e: 1564 logging.debug('Failed to clean track_status_file on ' 1565 'devserver for host %s and process id %s: %s', 1566 host_name, pid, str(e)) 1567 return False 1568 1569 return True 1570 1571 @remote_devserver_call() 1572 def _collect_au_log(self, log_dir, **kwargs): 1573 """Collect logs from devserver after cros-update process is finished. 1574 1575 Collect the logs that recording the whole cros-update process, and 1576 write it to sysinfo path of a job. 1577 1578 The example log file name that is stored is like: 1579 '1220-repair/sysinfo/CrOS_update_host_name_pid.log' 1580 1581 @param host_name: the DUT's hostname. 1582 @param pid: the auto-update process id on devserver. 1583 @param log_dir: The directory to save the cros-update process log 1584 retrieved from devserver. 1585 """ 1586 call = self.build_call('collect_cros_au_log', **kwargs) 1587 response = self.run_call(call) 1588 if not os.path.exists(log_dir): 1589 os.mkdir(log_dir) 1590 write_file = os.path.join( 1591 log_dir, 'CrOS_update_%s_%s.log' % ( 1592 kwargs['host_name'], kwargs['pid'])) 1593 logging.debug('Saving auto-update logs into %s', write_file) 1594 try: 1595 with open(write_file, 'w') as out_log: 1596 out_log.write(response) 1597 except: 1598 raise DevServerException('Failed to write auto-update logs into ' 1599 '%s' % write_file) 1600 1601 1602 def collect_au_log(self, host_name, pid, log_dir): 1603 """Collect logs from devserver after cros-update process is finished. 1604 1605 @param host_name: the DUT's hostname. 1606 @param pid: the auto-update process id on devserver. 1607 @param log_dir: The directory to save the cros-update process log 1608 retrieved from devserver. 1609 1610 @return: True if auto-update log is successfully collected, False 1611 otherwise. 1612 """ 1613 if not pid: 1614 return False 1615 1616 kwargs = {'host_name': host_name, 'pid': pid} 1617 try: 1618 self._collect_au_log(log_dir, **kwargs) 1619 except DevServerException as e: 1620 logging.debug('Failed to collect auto-update log on ' 1621 'devserver for host %s and process id %s: %s', 1622 host_name, pid, str(e)) 1623 return False 1624 1625 return True 1626 1627 1628 @remote_devserver_call() 1629 def _trigger_auto_update(self, **kwargs): 1630 """Trigger auto-update by calling devserver.cros_au. 1631 1632 @param kwargs: Arguments to make cros_au devserver call. 1633 1634 @return: a tuple indicates whether the RPC call cros_au succeeds and 1635 the auto-update process id running on devserver. 1636 """ 1637 host_name = kwargs['host_name'] 1638 call = self.build_call('cros_au', async=True, **kwargs) 1639 try: 1640 response = self.run_call(call) 1641 logging.info( 1642 'Received response from devserver for cros_au call: %r', 1643 response) 1644 except httplib.BadStatusLine as e: 1645 logging.error(e) 1646 raise DevServerException('Received Bad Status line, Devserver %s ' 1647 'might have gone down while handling ' 1648 'the call: %s' % (self.url(), call)) 1649 1650 return response 1651 1652 1653 def _wait_for_auto_update_finished(self, pid, **kwargs): 1654 """Polling devserver.get_au_status to get current auto-update status. 1655 1656 The current auto-update status is used to identify whether the update 1657 process is finished. 1658 1659 @param pid: The background process id for auto-update in devserver. 1660 @param kwargs: keyword arguments to make get_au_status devserver call. 1661 1662 @return: True if auto-update is finished for a given dut. 1663 """ 1664 logging.debug('Check the progress for auto-update process %r', pid) 1665 kwargs['pid'] = pid 1666 call = self.build_call('get_au_status', **kwargs) 1667 1668 def all_finished(): 1669 """Call devserver.get_au_status rpc to check if auto-update 1670 is finished. 1671 1672 @return: True if auto-update is finished for a given dut. False 1673 otherwise. 1674 @rasies DevServerException, the exception is a wrapper of all 1675 exceptions that were raised when devserver tried to 1676 download the artifacts. devserver raises an HTTPError or 1677 a CmdError when an exception was raised in the code. Such 1678 exception should be re-raised here to stop the caller from 1679 waiting. If the call to devserver failed for connection 1680 issue, a URLError exception is raised, and caller should 1681 retry the call to avoid such network flakiness. 1682 1683 """ 1684 try: 1685 response = json.loads(self.run_call(call)) 1686 # This is a temp fix to fit both dict and tuple returning 1687 # values. The dict check will be removed after a corresponding 1688 # devserver CL is deployed. 1689 if isinstance(response, dict): 1690 if response.get('detailed_error_msg'): 1691 raise DevServerException( 1692 response.get('detailed_error_msg')) 1693 1694 if response.get('finished'): 1695 logging.debug('CrOS auto-update is finished') 1696 return True 1697 else: 1698 logging.debug('Current CrOS auto-update status: %s', 1699 response.get('status')) 1700 return False 1701 1702 if not response[0]: 1703 logging.debug('Current CrOS auto-update status: %s', 1704 response[1]) 1705 return False 1706 else: 1707 logging.debug('CrOS auto-update is finished') 1708 return True 1709 except urllib2.HTTPError as e: 1710 error_markup = e.read() 1711 raise DevServerException(_strip_http_message(error_markup)) 1712 except urllib2.URLError as e: 1713 # Could be connection issue, retry it. 1714 # For example: <urlopen error [Errno 111] Connection refused> 1715 logging.warning('URLError (%r): Retrying connection to ' 1716 'devserver to check auto-update status.', e) 1717 return False 1718 except error.CmdError: 1719 # Retry if SSH failed to connect to the devserver. 1720 logging.warning('CmdError: Retrying SSH connection to check ' 1721 'auto-update status.') 1722 return False 1723 except socket.error as e: 1724 # Could be some temporary devserver connection issues. 1725 logging.warning('Socket Error (%r): Retrying connection to ' 1726 'devserver to check auto-update status.', e) 1727 return False 1728 1729 site_utils.poll_for_condition( 1730 all_finished, 1731 exception=site_utils.TimeoutError(), 1732 timeout=DEVSERVER_IS_CROS_AU_FINISHED_TIMEOUT_MIN * 60, 1733 sleep_interval=CROS_AU_POLLING_INTERVAL) 1734 1735 return True 1736 1737 1738 def wait_for_auto_update_finished(self, response, **kwargs): 1739 """Processing response of 'cros_au' and polling for auto-update status. 1740 1741 Will wait for the whole auto-update process is finished. 1742 1743 @param response: The response from RPC 'cros_au' 1744 @param kwargs: keyword arguments to make get_au_status devserver call. 1745 1746 @return: a tuple includes two elements. 1747 raised_error: None if everything works well or the raised error. 1748 pid: the auto-update process id on devserver. 1749 """ 1750 1751 pid = 0 1752 raised_error = None 1753 try: 1754 response = json.loads(response) 1755 if response[0]: 1756 pid = response[1] 1757 logging.debug('start process %r for auto_update in devserver', 1758 pid) 1759 self._wait_for_auto_update_finished(pid, **kwargs) 1760 except Exception as e: 1761 logging.debug('Failed to trigger auto-update process on devserver') 1762 raised_error = e 1763 finally: 1764 return raised_error, pid 1765 1766 1767 def _parse_AU_error(self, response): 1768 """Parse auto_update error returned from devserver.""" 1769 return re.split('\n', response)[-1] 1770 1771 1772 def auto_update(self, host_name, build_name, log_dir=None, 1773 force_update=False, full_update=False): 1774 """Auto-update a CrOS host. 1775 1776 @param host_name: The hostname of the DUT to auto-update. 1777 @param build_name: The build name to be auto-updated on the DUT. 1778 @param log_dir: The log directory to store auto-update logs from 1779 devserver. 1780 @param force_update: Force an update even if the version installed 1781 is the same. Default: False. 1782 @param full_update: If True, do not run stateful update, directly 1783 force a full reimage. If False, try stateful 1784 update first if the dut is already installed 1785 with the same version. 1786 """ 1787 kwargs = {'host_name': host_name, 1788 'build_name': build_name, 1789 'force_update': force_update, 1790 'full_update': full_update} 1791 1792 error_msg = 'CrOS auto-update failed for host %s: %s' 1793 error_msg_attempt = 'Exception raised on auto_update attempt #%s:\n%s' 1794 is_au_success = False 1795 au_log_dir = os.path.join(log_dir, 1796 AUTO_UPDATE_LOG_DIR) if log_dir else None 1797 error_list = [] 1798 for au_attempt in range(AU_RETRY_LIMIT): 1799 logging.debug('Start CrOS auto-update for host %s at %d time(s).', 1800 host_name, au_attempt + 1) 1801 # No matter _start_auto_update succeeds or fails, the auto-update 1802 # track_status_file should be cleaned, and the auto-update execute 1803 # log should be collected to directory sysinfo. Also, the error 1804 # raised by _start_auto_update should be displayed. 1805 try: 1806 response = self._trigger_auto_update(**kwargs) 1807 except DevServerException as e: 1808 logging.debug(error_msg_attempt, au_attempt+1, str(e)) 1809 error_list.append(str(e)) 1810 else: 1811 raised_error, pid = self.wait_for_auto_update_finished(response, 1812 **kwargs) 1813 # Error happens in _clean_track_log won't be raised. Auto-update 1814 # process will be retried. 1815 is_clean_success = self.clean_track_log(host_name, pid) 1816 # Error happens in _collect_au_log won't be raised. Auto-update 1817 # process will be retried. 1818 if au_log_dir: 1819 is_collect_success = self.collect_au_log( 1820 host_name, pid, au_log_dir) 1821 else: 1822 is_collect_success = True 1823 # If any error is raised previously, log it and retry 1824 # auto-update. Otherwise, claim a success CrOS auto-update. 1825 if not raised_error and is_clean_success and is_collect_success: 1826 logging.debug('CrOS auto-update succeed for host %s', 1827 host_name) 1828 is_au_success = True 1829 break 1830 else: 1831 if raised_error: 1832 logging.debug(error_msg_attempt, au_attempt+1, 1833 str(raised_error)) 1834 error_list.append(self._parse_AU_error(str(raised_error))) 1835 if not self.kill_au_process_for_host(host_name): 1836 logging.debug('Failed to kill auto_update process %d', 1837 pid) 1838 1839 finally: 1840 if not is_au_success and au_attempt < AU_RETRY_LIMIT - 1: 1841 time.sleep(CROS_AU_RETRY_INTERVAL) 1842 # TODO(kevcheng): Remove this once crbug.com/651974 is 1843 # fixed. 1844 # DNS is broken in the cassandra lab, so use the IP of the 1845 # hostname instead if it fails. Not rename host_name here 1846 # for error msg reporting. 1847 host_name_ip = socket.gethostbyname(host_name) 1848 kwargs['host_name'] = host_name_ip 1849 logging.debug( 1850 'AU failed, trying IP instead of hostname: %s', 1851 host_name_ip) 1852 1853 if not is_au_success: 1854 # If errors happen in the CrOS AU process, report the first error 1855 # since the following errors might be caused by the first error. 1856 # If error happens in RPCs of cleaning track log, collecting 1857 # auto-update logs, or killing auto-update processes, just report 1858 # them together. 1859 if error_list: 1860 raise DevServerException(error_msg % (host_name, error_list[0])) 1861 else: 1862 raise DevServerException(error_msg % ( 1863 host_name, ('RPC calls after the whole auto-update ' 1864 'process failed.'))) 1865 1866 1867class AndroidBuildServer(ImageServerBase): 1868 """Class for DevServer that handles RPCs related to Android builds. 1869 1870 The calls to devserver to stage artifacts, including stage and download, are 1871 made in async mode. That is, when caller makes an RPC |stage| to request 1872 devserver to stage certain artifacts, devserver handles the call and starts 1873 staging artifacts in a new thread, and return |Success| without waiting for 1874 staging being completed. When caller receives message |Success|, it polls 1875 devserver's is_staged call until all artifacts are staged. 1876 Such mechanism is designed to prevent cherrypy threads in devserver being 1877 running out, as staging artifacts might take long time, and cherrypy starts 1878 with a fixed number of threads that handle devserver rpc. 1879 """ 1880 1881 def wait_for_artifacts_staged(self, target, build_id, branch, 1882 archive_url=None, artifacts='', files=''): 1883 """Polling devserver.is_staged until all artifacts are staged. 1884 1885 @param target: Target of the android build to stage, e.g., 1886 shamu-userdebug. 1887 @param build_id: Build id of the android build to stage. 1888 @param branch: Branch of the android build to stage. 1889 @param archive_url: Google Storage URL for the build. 1890 @param artifacts: Comma separated list of artifacts to download. 1891 @param files: Comma separated list of files to download. 1892 1893 @return: True if all artifacts are staged in devserver. 1894 """ 1895 kwargs = {'target': target, 1896 'build_id': build_id, 1897 'branch': branch, 1898 'artifacts': artifacts, 1899 'files': files, 1900 'os_type': 'android'} 1901 if archive_url: 1902 kwargs['archive_url'] = archive_url 1903 return self._poll_is_staged(**kwargs) 1904 1905 1906 @remote_devserver_call() 1907 def call_and_wait(self, call_name, target, build_id, branch, archive_url, 1908 artifacts, files, error_message, 1909 expected_response=SUCCESS): 1910 """Helper method to make a urlopen call, and wait for artifacts staged. 1911 1912 @param call_name: name of devserver rpc call. 1913 @param target: Target of the android build to stage, e.g., 1914 shamu-userdebug. 1915 @param build_id: Build id of the android build to stage. 1916 @param branch: Branch of the android build to stage. 1917 @param archive_url: Google Storage URL for the CrOS build. 1918 @param artifacts: Comma separated list of artifacts to download. 1919 @param files: Comma separated list of files to download. 1920 @param expected_response: Expected response from rpc, default to 1921 |Success|. If it's set to None, do not compare 1922 the actual response. Any response is consider 1923 to be good. 1924 @param error_message: Error message to be thrown if response does not 1925 match expected_response. 1926 1927 @return: The response from rpc. 1928 @raise DevServerException upon any return code that's expected_response. 1929 1930 """ 1931 kwargs = {'target': target, 1932 'build_id': build_id, 1933 'branch': branch, 1934 'artifacts': artifacts, 1935 'files': files, 1936 'os_type': 'android'} 1937 if archive_url: 1938 kwargs['archive_url'] = archive_url 1939 return self._call_and_wait(call_name, error_message, expected_response, 1940 **kwargs) 1941 1942 1943 @remote_devserver_call() 1944 def stage_artifacts(self, target=None, build_id=None, branch=None, 1945 image=None, artifacts=None, files='', archive_url=None): 1946 """Tell the devserver to download and stage |artifacts| from |image|. 1947 1948 This is the main call point for staging any specific artifacts for a 1949 given build. To see the list of artifacts one can stage see: 1950 1951 ~src/platfrom/dev/artifact_info.py. 1952 1953 This is maintained along with the actual devserver code. 1954 1955 @param target: Target of the android build to stage, e.g., 1956 shamu-userdebug. 1957 @param build_id: Build id of the android build to stage. 1958 @param branch: Branch of the android build to stage. 1959 @param image: Name of a build to test, in the format of 1960 branch/target/build_id 1961 @param artifacts: A list of artifacts. 1962 @param files: A list of files to stage. 1963 @param archive_url: Optional parameter that has the archive_url to stage 1964 this artifact from. Default is specified in autotest config + 1965 image. 1966 1967 @raise DevServerException upon any return code that's not HTTP OK. 1968 """ 1969 if image and not target and not build_id and not branch: 1970 branch, target, build_id = utils.parse_launch_control_build(image) 1971 if not target or not build_id or not branch: 1972 raise DevServerException('Must specify all build info (target, ' 1973 'build_id and branch) to stage.') 1974 1975 android_build_info = {'target': target, 1976 'build_id': build_id, 1977 'branch': branch} 1978 if not artifacts and not files: 1979 raise DevServerException('Must specify something to stage.') 1980 if not all(android_build_info.values()): 1981 raise DevServerException( 1982 'To stage an Android build, must specify target, build id ' 1983 'and branch.') 1984 build = ANDROID_BUILD_NAME_PATTERN % android_build_info 1985 self._stage_artifacts(build, artifacts, files, archive_url, 1986 **android_build_info) 1987 1988 1989 def trigger_download(self, target, build_id, branch, artifacts=None, 1990 files='', os='android', synchronous=True): 1991 """Tell the devserver to download and stage an Android build. 1992 1993 Tells the devserver to fetch an Android build from the image storage 1994 server named by _get_image_storage_server(). 1995 1996 If |synchronous| is True, waits for the entire download to finish 1997 staging before returning. Otherwise only the artifacts necessary 1998 to start installing images onto DUT's will be staged before returning. 1999 A caller can then call finish_download to guarantee the rest of the 2000 artifacts have finished staging. 2001 2002 @param target: Target of the android build to stage, e.g., 2003 shamu-userdebug. 2004 @param build_id: Build id of the android build to stage. 2005 @param branch: Branch of the android build to stage. 2006 @param artifacts: A string of artifacts separated by comma. If None, 2007 use the default artifacts for Android or Brillo build. 2008 @param files: String of file seperated by commas. 2009 @param os: OS artifacts to download (android/brillo). 2010 @param synchronous: if True, waits until all components of the image are 2011 staged before returning. 2012 2013 @raise DevServerException upon any return code that's not HTTP OK. 2014 2015 """ 2016 android_build_info = {'target': target, 2017 'build_id': build_id, 2018 'branch': branch} 2019 build = ANDROID_BUILD_NAME_PATTERN % android_build_info 2020 if not artifacts: 2021 board = target.split('-')[0] 2022 artifacts = ( 2023 android_utils.AndroidArtifacts.get_artifacts_for_reimage( 2024 board, os)) 2025 self._trigger_download(build, artifacts, files=files, 2026 synchronous=synchronous, **android_build_info) 2027 2028 2029 def finish_download(self, target, build_id, branch, os='android'): 2030 """Tell the devserver to finish staging an Android build. 2031 2032 If trigger_download is called with synchronous=False, it will return 2033 before all artifacts have been staged. This method contacts the 2034 devserver and blocks until all staging is completed and should be 2035 called after a call to trigger_download. 2036 2037 @param target: Target of the android build to stage, e.g., 2038 shamu-userdebug. 2039 @param build_id: Build id of the android build to stage. 2040 @param branch: Branch of the android build to stage. 2041 @param os: OS artifacts to download (android/brillo). 2042 2043 @raise DevServerException upon any return code that's not HTTP OK. 2044 """ 2045 android_build_info = {'target': target, 2046 'build_id': build_id, 2047 'branch': branch} 2048 build = ANDROID_BUILD_NAME_PATTERN % android_build_info 2049 board = target.split('-')[0] 2050 artifacts = ( 2051 android_utils.AndroidArtifacts.get_artifacts_for_reimage( 2052 board)) 2053 self._finish_download(build, artifacts, files='', **android_build_info) 2054 2055 2056 def get_staged_file_url(self, filename, target, build_id, branch): 2057 """Returns the url of a staged file for this image on the devserver. 2058 2059 @param filename: Name of the file. 2060 @param target: Target of the android build to stage, e.g., 2061 shamu-userdebug. 2062 @param build_id: Build id of the android build to stage. 2063 @param branch: Branch of the android build to stage. 2064 2065 @return: The url of a staged file for this image on the devserver. 2066 """ 2067 android_build_info = {'target': target, 2068 'build_id': build_id, 2069 'branch': branch, 2070 'os_type': 'android'} 2071 build = ANDROID_BUILD_NAME_PATTERN % android_build_info 2072 return '/'.join([self._get_image_url(build), filename]) 2073 2074 2075 @remote_devserver_call() 2076 def translate(self, build_name): 2077 """Translate the build name if it's in LATEST format. 2078 2079 If the build name is in the format [branch]/[target]/LATEST, return the 2080 latest build in Launch Control otherwise return the build name as is. 2081 2082 @param build_name: build_name to check. 2083 2084 @return The actual build name to use. 2085 """ 2086 branch, target, build_id = utils.parse_launch_control_build(build_name) 2087 if build_id.upper() != 'LATEST': 2088 return build_name 2089 call = self.build_call('latestbuild', branch=branch, target=target, 2090 os_type='android') 2091 translated_build_id = self.run_call(call) 2092 translated_build = (ANDROID_BUILD_NAME_PATTERN % 2093 {'branch': branch, 2094 'target': target, 2095 'build_id': translated_build_id}) 2096 logging.debug('Translated relative build %s to %s', build_name, 2097 translated_build) 2098 return translated_build 2099 2100 2101def _is_load_healthy(load): 2102 """Check if devserver's load meets the minimum threshold. 2103 2104 @param load: The devserver's load stats to check. 2105 2106 @return: True if the load meets the minimum threshold. Return False 2107 otherwise. 2108 2109 """ 2110 # Threshold checks, including CPU load. 2111 if load[DevServer.CPU_LOAD] > DevServer.MAX_CPU_LOAD: 2112 logging.debug('CPU load of devserver %s is at %s%%, which is higher ' 2113 'than the threshold of %s%%', load['devserver'], 2114 load[DevServer.CPU_LOAD], DevServer.MAX_CPU_LOAD) 2115 return False 2116 if load[DevServer.NETWORK_IO] > DevServer.MAX_NETWORK_IO: 2117 logging.debug('Network IO of devserver %s is at %i Bps, which is ' 2118 'higher than the threshold of %i bytes per second.', 2119 load['devserver'], load[DevServer.NETWORK_IO], 2120 DevServer.MAX_NETWORK_IO) 2121 return False 2122 return True 2123 2124 2125def _compare_load(devserver1, devserver2): 2126 """Comparator function to compare load between two devservers. 2127 2128 @param devserver1: A dictionary of devserver load stats to be compared. 2129 @param devserver2: A dictionary of devserver load stats to be compared. 2130 2131 @return: Negative value if the load of `devserver1` is less than the load 2132 of `devserver2`. Return positive value otherwise. 2133 2134 """ 2135 return int(devserver1[DevServer.DISK_IO] - devserver2[DevServer.DISK_IO]) 2136 2137 2138def get_least_loaded_devserver(devserver_type=ImageServer, hostname=None): 2139 """Get the devserver with the least load. 2140 2141 Iterate through all devservers and get the one with least load. 2142 2143 TODO(crbug.com/486278): Devserver with required build already staged should 2144 take higher priority. This will need check_health call to be able to verify 2145 existence of a given build/artifact. Also, in case all devservers are 2146 overloaded, the logic here should fall back to the old behavior that randomly 2147 selects a devserver based on the hash of the image name/url. 2148 2149 @param devserver_type: Type of devserver to select from. Default is set to 2150 ImageServer. 2151 @param hostname: Hostname of the dut that the devserver is used for. The 2152 picked devserver needs to respect the location of the host if 2153 `prefer_local_devserver` is set to True or `restricted_subnets` is 2154 set. 2155 2156 @return: Name of the devserver with the least load. 2157 2158 """ 2159 devservers, can_retry = devserver_type.get_available_devservers( 2160 hostname) 2161 # If no healthy devservers available and can_retry is False, return None. 2162 # Otherwise, relax the constrain on hostname, allow all devservers to be 2163 # available. 2164 if not devserver_type.get_healthy_devserver('', devservers): 2165 if not can_retry: 2166 return None 2167 else: 2168 devservers, _ = devserver_type.get_available_devservers() 2169 2170 # get_devserver_load call needs to be made in a new process to allow force 2171 # timeout using signal. 2172 output = multiprocessing.Queue() 2173 processes = [] 2174 for devserver in devservers: 2175 processes.append(multiprocessing.Process( 2176 target=devserver_type.get_devserver_load_wrapper, 2177 args=(devserver, TIMEOUT_GET_DEVSERVER_LOAD, output))) 2178 2179 for p in processes: 2180 p.start() 2181 for p in processes: 2182 p.join() 2183 loads = [output.get() for p in processes] 2184 # Filter out any load failed to be retrieved or does not support load check. 2185 loads = [load for load in loads if load and DevServer.CPU_LOAD in load and 2186 DevServer.is_free_disk_ok(load) and 2187 DevServer.is_apache_client_count_ok(load)] 2188 if not loads: 2189 logging.debug('Failed to retrieve load stats from any devserver. No ' 2190 'load balancing can be applied.') 2191 return None 2192 loads = [load for load in loads if _is_load_healthy(load)] 2193 if not loads: 2194 logging.error('No devserver has the capacity to be selected.') 2195 return None 2196 loads = sorted(loads, cmp=_compare_load) 2197 return loads[0]['devserver'] 2198 2199 2200def resolve(build, hostname=None): 2201 """Resolve a devserver can be used for given build and hostname. 2202 2203 @param build: Name of a build to stage on devserver, e.g., 2204 ChromeOS build: daisy-release/R50-1234.0.0 2205 Launch Control build: git_mnc_release/shamu-eng 2206 @param hostname: Hostname of a devserver for, default is None, which means 2207 devserver is not restricted by the network location of the host. 2208 2209 @return: A DevServer instance that can be used to stage given build for the 2210 given host. 2211 """ 2212 if utils.is_launch_control_build(build): 2213 return AndroidBuildServer.resolve(build, hostname) 2214 else: 2215 return ImageServer.resolve(build, hostname) 2216