dev_server.py revision db0366c921c4067beb88f94a1b9df7ddf9265a37
1# Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5from distutils import version 6import cStringIO 7import HTMLParser 8import httplib 9import json 10import logging 11import multiprocessing 12import os 13import re 14import sys 15import urllib2 16 17from autotest_lib.client.bin import utils as site_utils 18from autotest_lib.client.common_lib import error 19from autotest_lib.client.common_lib import global_config 20from autotest_lib.client.common_lib import utils 21from autotest_lib.client.common_lib.cros import retry 22from autotest_lib.client.common_lib.cros.graphite import autotest_stats 23# TODO(cmasone): redo this class using requests module; http://crosbug.com/30107 24 25 26CONFIG = global_config.global_config 27# This file is generated at build time and specifies, per suite and per test, 28# the DEPENDENCIES list specified in each control file. It's a dict of dicts: 29# {'bvt': {'/path/to/autotest/control/site_tests/test1/control': ['dep1']} 30# 'suite': {'/path/to/autotest/control/site_tests/test2/control': ['dep2']} 31# 'power': {'/path/to/autotest/control/site_tests/test1/control': ['dep1'], 32# '/path/to/autotest/control/site_tests/test3/control': ['dep3']} 33# } 34DEPENDENCIES_FILE = 'test_suites/dependency_info' 35# Number of seconds for caller to poll devserver's is_staged call to check if 36# artifacts are staged. 37_ARTIFACT_STAGE_POLLING_INTERVAL = 5 38# Artifacts that should be staged when client calls devserver RPC to stage an 39# image. 40_ARTIFACTS_TO_BE_STAGED_FOR_IMAGE = 'full_payload,test_suites,stateful' 41# Artifacts that should be staged when client calls devserver RPC to stage an 42# image with autotest artifact. 43_ARTIFACTS_TO_BE_STAGED_FOR_IMAGE_WITH_AUTOTEST = ('full_payload,test_suites,' 44 'control_files,stateful,' 45 'autotest_packages') 46# Artifacts that should be staged when client calls devserver RPC to stage an 47# Android build. 48_ANDROID_ARTIFACTS_TO_BE_STAGED_FOR_IMAGE = ('bootloader_image,radio_image,' 49 'zip_images,test_zip') 50# Artifacts that should be staged when client calls devserver RPC to stage an 51# Android build. 52_BRILLO_ARTIFACTS_TO_BE_STAGED_FOR_IMAGE = ('zip_images,vendor_partitions') 53SKIP_DEVSERVER_HEALTH_CHECK = CONFIG.get_config_value( 54 'CROS', 'skip_devserver_health_check', type=bool) 55# Number of seconds for the call to get devserver load to time out. 56TIMEOUT_GET_DEVSERVER_LOAD = 2.0 57 58# Android artifact path in devserver 59ANDROID_BUILD_NAME_PATTERN = CONFIG.get_config_value( 60 'CROS', 'android_build_name_pattern', type=str).replace('\\', '') 61 62# Return value from a devserver RPC indicating the call succeeded. 63SUCCESS = 'Success' 64 65PREFER_LOCAL_DEVSERVER = CONFIG.get_config_value( 66 'CROS', 'prefer_local_devserver', type=bool, default=False) 67 68ENABLE_DEVSERVER_IN_RESTRICTED_SUBNET = CONFIG.get_config_value( 69 'CROS', 'enable_devserver_in_restricted_subnet', type=bool, 70 default=False) 71 72class MarkupStripper(HTMLParser.HTMLParser): 73 """HTML parser that strips HTML tags, coded characters like & 74 75 Works by, basically, not doing anything for any tags, and only recording 76 the content of text nodes in an internal data structure. 77 """ 78 def __init__(self): 79 self.reset() 80 self.fed = [] 81 82 83 def handle_data(self, d): 84 """Consume content of text nodes, store it away.""" 85 self.fed.append(d) 86 87 88 def get_data(self): 89 """Concatenate and return all stored data.""" 90 return ''.join(self.fed) 91 92 93def _get_image_storage_server(): 94 return CONFIG.get_config_value('CROS', 'image_storage_server', type=str) 95 96 97def _get_canary_channel_server(): 98 """ 99 Get the url of the canary-channel server, 100 eg: gsutil://chromeos-releases/canary-channel/<board>/<release> 101 102 @return: The url to the canary channel server. 103 """ 104 return CONFIG.get_config_value('CROS', 'canary_channel_server', type=str) 105 106 107def _get_storage_server_for_artifacts(artifacts=None): 108 """Gets the appropriate storage server for the given artifacts. 109 110 @param artifacts: A list of artifacts we need to stage. 111 @return: The address of the storage server that has these artifacts. 112 The default image storage server if no artifacts are specified. 113 """ 114 factory_artifact = global_config.global_config.get_config_value( 115 'CROS', 'factory_artifact', type=str, default='') 116 if artifacts and factory_artifact and factory_artifact in artifacts: 117 return _get_canary_channel_server() 118 return _get_image_storage_server() 119 120 121def _get_dev_server_list(): 122 return CONFIG.get_config_value('CROS', 'dev_server', type=list, default=[]) 123 124 125def _get_crash_server_list(): 126 return CONFIG.get_config_value('CROS', 'crash_server', type=list, 127 default=[]) 128 129 130def remote_devserver_call(timeout_min=30): 131 """A decorator to use with remote devserver calls. 132 133 This decorator converts urllib2.HTTPErrors into DevServerExceptions with 134 any embedded error info converted into plain text. 135 The method retries on urllib2.URLError to avoid devserver flakiness. 136 """ 137 #pylint: disable=C0111 138 def inner_decorator(method): 139 140 @retry.retry(urllib2.URLError, timeout_min=timeout_min) 141 def wrapper(*args, **kwargs): 142 """This wrapper actually catches the HTTPError.""" 143 try: 144 return method(*args, **kwargs) 145 except urllib2.HTTPError as e: 146 error_markup = e.read() 147 strip = MarkupStripper() 148 try: 149 strip.feed(error_markup.decode('utf_32')) 150 except UnicodeDecodeError: 151 strip.feed(error_markup) 152 raise DevServerException(strip.get_data()) 153 154 return wrapper 155 156 return inner_decorator 157 158 159class DevServerException(Exception): 160 """Raised when the dev server returns a non-200 HTTP response.""" 161 pass 162 163 164class DevServer(object): 165 """Base class for all DevServer-like server stubs. 166 167 This is the base class for interacting with all Dev Server-like servers. 168 A caller should instantiate a sub-class of DevServer with: 169 170 host = SubClassServer.resolve(build) 171 server = SubClassServer(host) 172 """ 173 _MIN_FREE_DISK_SPACE_GB = 20 174 _MAX_APACHE_CLIENT_COUNT = 75 175 # Threshold for the CPU load percentage for a devserver to be selected. 176 MAX_CPU_LOAD = 80.0 177 # Threshold for the network IO, set to 80MB/s 178 MAX_NETWORK_IO = 1024 * 1024 * 80 179 DISK_IO = 'disk_total_bytes_per_second' 180 NETWORK_IO = 'network_total_bytes_per_second' 181 CPU_LOAD = 'cpu_percent' 182 FREE_DISK = 'free_disk' 183 STAGING_THREAD_COUNT = 'staging_thread_count' 184 APACHE_CLIENT_COUNT = 'apache_client_count' 185 186 187 def __init__(self, devserver): 188 self._devserver = devserver 189 190 191 def url(self): 192 """Returns the url for this devserver.""" 193 return self._devserver 194 195 196 @staticmethod 197 def get_server_name(url): 198 """Strip the http:// prefix and port from a url. 199 200 @param url: A url of a server. 201 202 @return the server name without http:// prefix and port. 203 204 """ 205 return re.sub(r':\d+.*', '', url.lstrip('http://')) 206 207 208 @staticmethod 209 def get_server_url(url): 210 """Get the devserver url from a repo url, which includes build info. 211 212 @param url: A job repo url. 213 214 @return A devserver url, e.g., http://127.0.0.10:8080 215 """ 216 match = re.match(r'(http://.*:\d+).*', url) 217 if match: 218 return match.group(1) 219 220 221 @staticmethod 222 def get_devserver_load_wrapper(devserver, timeout_sec, output): 223 """A wrapper function to call get_devserver_load in parallel. 224 225 @param devserver: url of the devserver. 226 @param timeout_sec: Number of seconds before time out the devserver 227 call. 228 @param output: An output queue to save results to. 229 """ 230 load = DevServer.get_devserver_load(devserver, 231 timeout_min=timeout_sec/60.0) 232 if load: 233 load['devserver'] = devserver 234 output.put(load) 235 236 237 @staticmethod 238 def get_devserver_load(devserver, timeout_min=0.1): 239 """Returns True if the |devserver| is healthy to stage build. 240 241 @param devserver: url of the devserver. 242 @param timeout_min: How long to wait in minutes before deciding the 243 the devserver is not up (float). 244 245 @return: A dictionary of the devserver's load. 246 247 """ 248 server_name = DevServer.get_server_name(devserver) 249 # statsd treats |.| as path separator. 250 server_name = server_name.replace('.', '_') 251 call = DevServer._build_call(devserver, 'check_health') 252 253 @remote_devserver_call(timeout_min=timeout_min) 254 def make_call(): 255 """Inner method that makes the call.""" 256 return utils.urlopen_socket_timeout( 257 call, timeout=timeout_min * 60).read() 258 259 try: 260 result_dict = json.load(cStringIO.StringIO(make_call())) 261 for key, val in result_dict.iteritems(): 262 try: 263 autotest_stats.Gauge(server_name).send(key, float(val)) 264 except ValueError: 265 # Ignore all non-numerical health data. 266 pass 267 268 return result_dict 269 except Exception as e: 270 logging.error('Devserver call failed: "%s", timeout: %s seconds,' 271 ' Error: %s', call, timeout_min * 60, e) 272 273 274 @staticmethod 275 def is_free_disk_ok(load): 276 """Check if a devserver has enough free disk. 277 278 @param load: A dict of the load of the devserver. 279 280 @return: True if the devserver has enough free disk or disk check is 281 skipped in global config. 282 283 """ 284 if SKIP_DEVSERVER_HEALTH_CHECK: 285 logging.debug('devserver health check is skipped.') 286 elif load[DevServer.FREE_DISK] < DevServer._MIN_FREE_DISK_SPACE_GB: 287 return False 288 289 return True 290 291 292 @staticmethod 293 def is_apache_client_count_ok(load): 294 """Check if a devserver has enough Apache connections available. 295 296 Apache server by default has maximum of 150 concurrent connections. If 297 a devserver has too many live connections, it likely indicates the 298 server is busy handling many long running download requests, e.g., 299 downloading stateful partitions. It is better not to add more requests 300 to it. 301 302 @param load: A dict of the load of the devserver. 303 304 @return: True if the devserver has enough Apache connections available, 305 or disk check is skipped in global config. 306 307 """ 308 if SKIP_DEVSERVER_HEALTH_CHECK: 309 logging.debug('devserver health check is skipped.') 310 elif DevServer.APACHE_CLIENT_COUNT not in load: 311 logging.debug('Apache client count is not collected from devserver.') 312 elif (load[DevServer.APACHE_CLIENT_COUNT] > 313 DevServer._MAX_APACHE_CLIENT_COUNT): 314 return False 315 316 return True 317 318 319 @staticmethod 320 def devserver_healthy(devserver, timeout_min=0.1): 321 """Returns True if the |devserver| is healthy to stage build. 322 323 @param devserver: url of the devserver. 324 @param timeout_min: How long to wait in minutes before deciding the 325 the devserver is not up (float). 326 327 @return: True if devserver is healthy. Return False otherwise. 328 329 """ 330 server_name = DevServer.get_server_name(devserver) 331 # statsd treats |.| as path separator. 332 server_name = server_name.replace('.', '_') 333 load = DevServer.get_devserver_load(devserver, timeout_min=timeout_min) 334 if not load: 335 # Failed to get the load of devserver. 336 autotest_stats.Counter(server_name + 337 '.devserver_not_healthy').increment() 338 return False 339 340 apache_ok = DevServer.is_apache_client_count_ok(load) 341 if not apache_ok: 342 logging.error('Devserver check_health failed. Live Apache client ' 343 'count is too high: %d.', 344 load[DevServer.APACHE_CLIENT_COUNT]) 345 autotest_stats.Counter(server_name + 346 '.devserver_not_healthy').increment() 347 return False 348 349 disk_ok = DevServer.is_free_disk_ok(load) 350 if not disk_ok: 351 logging.error('Devserver check_health failed. Free disk space is ' 352 'low. Only %dGB is available.', 353 load[DevServer.FREE_DISK]) 354 counter = '.devserver_healthy' if disk_ok else '.devserver_not_healthy' 355 # This counter indicates the load of a devserver. By comparing the 356 # value of this counter for all devservers, we can evaluate the 357 # load balancing across all devservers. 358 autotest_stats.Counter(server_name + counter).increment() 359 return disk_ok 360 361 362 @staticmethod 363 def _build_call(host, method, **kwargs): 364 """Build a URL to |host| that calls |method|, passing |kwargs|. 365 366 Builds a URL that calls |method| on the dev server defined by |host|, 367 passing a set of key/value pairs built from the dict |kwargs|. 368 369 @param host: a string that is the host basename e.g. http://server:90. 370 @param method: the dev server method to call. 371 @param kwargs: a dict mapping arg names to arg values. 372 @return the URL string. 373 """ 374 argstr = '&'.join(map(lambda x: "%s=%s" % x, kwargs.iteritems())) 375 return "%(host)s/%(method)s?%(argstr)s" % dict( 376 host=host, method=method, argstr=argstr) 377 378 379 def build_call(self, method, **kwargs): 380 """Builds a devserver RPC string that can be invoked using urllib.open. 381 382 @param method: remote devserver method to call. 383 """ 384 return self._build_call(self._devserver, method, **kwargs) 385 386 387 @classmethod 388 def build_all_calls(cls, method, **kwargs): 389 """Builds a list of URLs that makes RPC calls on all devservers. 390 391 Build a URL that calls |method| on the dev server, passing a set 392 of key/value pairs built from the dict |kwargs|. 393 394 @param method: the dev server method to call. 395 @param kwargs: a dict mapping arg names to arg values 396 @return the URL string 397 """ 398 calls = [] 399 # Note we use cls.servers as servers is class specific. 400 for server in cls.servers(): 401 if cls.devserver_healthy(server): 402 calls.append(cls._build_call(server, method, **kwargs)) 403 404 return calls 405 406 407 @staticmethod 408 def servers(): 409 """Returns a list of servers that can serve as this type of server.""" 410 raise NotImplementedError() 411 412 413 @classmethod 414 def get_devservers_in_same_subnet(cls, ip, mask_bits=19): 415 """Get the devservers in the same subnet of the given ip. 416 417 @param ip: The IP address of a dut to look for devserver. 418 @param mask_bits: Number of mask bits. Default is 19. 419 420 @return: A list of devservers in the same subnet of the given ip. 421 422 """ 423 # server from cls.servers() is a URL, e.g., http://10.1.1.10:8082, so 424 # we need a dict to return the full devserver path once the IPs are 425 # filtered in get_servers_in_same_subnet. 426 server_names = {} 427 all_devservers = [] 428 for server in cls.servers(): 429 server_name = ImageServer.get_server_name(server) 430 server_names[server_name] = server 431 all_devservers.append(server_name) 432 devservers = utils.get_servers_in_same_subnet(ip, mask_bits, 433 all_devservers) 434 return [server_names[s] for s in devservers] 435 436 437 @classmethod 438 def get_unrestricted_devservers( 439 cls, restricted_subnet=utils.RESTRICTED_SUBNETS): 440 """Get the devservers not in any restricted subnet specified in 441 restricted_subnet. 442 443 @param restricted_subnet: A list of restriected subnets. 444 445 @return: A list of devservers not in any restricted subnet. 446 447 """ 448 devservers = [] 449 for server in cls.servers(): 450 server_name = ImageServer.get_server_name(server) 451 if not utils.get_restricted_subnet(server_name, restricted_subnet): 452 devservers.append(server) 453 return devservers 454 455 456 @classmethod 457 def get_healthy_devserver(cls, build, devservers): 458 """"Get a healthy devserver instance from the list of devservers. 459 460 @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514). 461 462 @return: A DevServer object of a healthy devserver. Return None if no 463 healthy devserver is found. 464 465 """ 466 while devservers: 467 hash_index = hash(build) % len(devservers) 468 devserver = devservers.pop(hash_index) 469 if cls.devserver_healthy(devserver): 470 return cls(devserver) 471 472 473 @classmethod 474 def resolve(cls, build, hostname=None): 475 """"Resolves a build to a devserver instance. 476 477 @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514). 478 @param hostname: The hostname of dut that requests a devserver. It's 479 used to make sure a devserver in the same subnet is 480 preferred. 481 482 @raise DevServerException: If no devserver is available. 483 """ 484 host_ip = None 485 if hostname: 486 host_ip = site_utils.get_ip_address(hostname) 487 if not host_ip: 488 logging.error('Failed to get IP address of %s. Will pick a ' 489 'devserver without subnet constraint.', hostname) 490 491 devservers = cls.servers() 492 493 # Go through all restricted subnet settings and check if the DUT is 494 # inside a restricted subnet. If so, get the subnet setting. 495 restricted_subnet = None 496 if host_ip and ENABLE_DEVSERVER_IN_RESTRICTED_SUBNET: 497 for subnet_ip, mask_bits in utils.RESTRICTED_SUBNETS: 498 if utils.is_in_same_subnet(host_ip, subnet_ip, mask_bits): 499 restricted_subnet = subnet_ip 500 logging.debug('The host %s (%s) is in a restricted subnet. ' 501 'Try to locate a devserver inside subnet ' 502 '%s:%d.', hostname, host_ip, subnet_ip, 503 mask_bits) 504 devservers = cls.get_devservers_in_same_subnet( 505 subnet_ip, mask_bits) 506 break 507 # If devserver election is not restricted and 508 # enable_devserver_in_restricted_subnet in global config is set to True, 509 # select a devserver from unrestricted servers. Otherwise, drone will 510 # not be able to access devserver in restricted subnet. 511 can_retry = False 512 if (not restricted_subnet and utils.RESTRICTED_SUBNETS and 513 ENABLE_DEVSERVER_IN_RESTRICTED_SUBNET): 514 devservers = cls.get_unrestricted_devservers() 515 if PREFER_LOCAL_DEVSERVER and host_ip: 516 can_retry = True 517 devservers = cls.get_devserver_in_same_subnet( 518 host_ip, cls.get_unrestricted_devservers() ) 519 devserver = cls.get_healthy_devserver(build, devservers) 520 521 if not devserver and can_retry: 522 devserver = cls.get_healthy_devserver( 523 build, cls.get_unrestricted_devservers()) 524 if devserver: 525 return devserver 526 else: 527 if restricted_subnet: 528 subnet_error = ('in the same subnet as the host %s (%s)' % 529 (hostname, host_ip)) 530 else: 531 subnet_error = '' 532 error_msg = 'All devservers %s are currently down!!!' % subnet_error 533 logging.error(error_msg) 534 raise DevServerException(error_msg) 535 536 537class CrashServer(DevServer): 538 """Class of DevServer that symbolicates crash dumps.""" 539 @staticmethod 540 def servers(): 541 return _get_crash_server_list() 542 543 544 @remote_devserver_call() 545 def symbolicate_dump(self, minidump_path, build): 546 """Ask the devserver to symbolicate the dump at minidump_path. 547 548 Stage the debug symbols for |build| and, if that works, ask the 549 devserver to symbolicate the dump at |minidump_path|. 550 551 @param minidump_path: the on-disk path of the minidump. 552 @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514) 553 whose debug symbols are needed for symbolication. 554 @return The contents of the stack trace 555 @raise DevServerException upon any return code that's not HTTP OK. 556 """ 557 try: 558 import requests 559 except ImportError: 560 logging.warning("Can't 'import requests' to connect to dev server.") 561 return '' 562 server_name = self.get_server_name(self.url()) 563 server_name = server_name.replace('.', '_') 564 stats_key = 'CrashServer.%s.symbolicate_dump' % server_name 565 autotest_stats.Counter(stats_key).increment() 566 timer = autotest_stats.Timer(stats_key) 567 timer.start() 568 # Symbolicate minidump. 569 call = self.build_call('symbolicate_dump', 570 archive_url=_get_image_storage_server() + build) 571 request = requests.post( 572 call, files={'minidump': open(minidump_path, 'rb')}) 573 if request.status_code == requests.codes.OK: 574 timer.stop() 575 return request.text 576 577 error_fd = cStringIO.StringIO(request.text) 578 raise urllib2.HTTPError( 579 call, request.status_code, request.text, request.headers, 580 error_fd) 581 582 583class ImageServerBase(DevServer): 584 """Base class for devservers used to stage builds. 585 586 CrOS and Android builds are staged in different ways as they have different 587 sets of artifacts. This base class abstracts the shared functions between 588 the two types of ImageServer. 589 """ 590 591 @classmethod 592 def servers(cls): 593 """Returns a list of servers that can serve as a desired type of 594 devserver. 595 """ 596 return _get_dev_server_list() 597 598 599 def _get_image_url(self, image): 600 """Returns the url of the directory for this image on the devserver. 601 602 @param image: the image that was fetched. 603 """ 604 image = self.translate(image) 605 url_pattern = CONFIG.get_config_value('CROS', 'image_url_pattern', 606 type=str) 607 return (url_pattern % (self.url(), image)).replace('update', 'static') 608 609 610 @staticmethod 611 def create_stats_str(subname, server_name, artifacts): 612 """Create a graphite name given the staged items. 613 614 The resulting name will look like 615 'dev_server.subname.DEVSERVER_URL.artifact1_artifact2' 616 The name can be used to create a stats object like 617 stats.Timer, stats.Counter, etc. 618 619 @param subname: A name for the graphite sub path. 620 @param server_name: name of the devserver, e.g 172.22.33.44. 621 @param artifacts: A list of artifacts. 622 623 @return A name described above. 624 625 """ 626 staged_items = sorted(artifacts) if artifacts else [] 627 staged_items_str = '_'.join(staged_items).replace( 628 '.', '_') if staged_items else None 629 server_name = server_name.replace('.', '_') 630 stats_str = 'dev_server.%s.%s' % (subname, server_name) 631 if staged_items_str: 632 stats_str += '.%s' % staged_items_str 633 return stats_str 634 635 636 @staticmethod 637 def create_metadata(server_name, image, artifacts=None, files=None): 638 """Create a metadata dictionary given the staged items. 639 640 The metadata can be send to metadata db along with stats. 641 642 @param server_name: name of the devserver, e.g 172.22.33.44. 643 @param image: The name of the image. 644 @param artifacts: A list of artifacts. 645 @param files: A list of files. 646 647 @return A metadata dictionary. 648 649 """ 650 metadata = {'devserver': server_name, 651 'image': image, 652 '_type': 'devserver'} 653 if artifacts: 654 metadata['artifacts'] = ' '.join(artifacts) 655 if files: 656 metadata['files'] = ' '.join(files) 657 return metadata 658 659 660 def _poll_is_staged(self, **kwargs): 661 """Polling devserver.is_staged until all artifacts are staged. 662 663 @param kwargs: keyword arguments to make is_staged devserver call. 664 665 @return: True if all artifacts are staged in devserver. 666 """ 667 call = self.build_call('is_staged', **kwargs) 668 669 def all_staged(): 670 """Call devserver.is_staged rpc to check if all files are staged. 671 672 @return: True if all artifacts are staged in devserver. False 673 otherwise. 674 @rasies DevServerException, the exception is a wrapper of all 675 exceptions that were raised when devserver tried to download 676 the artifacts. devserver raises an HTTPError when an 677 exception was raised in the code. Such exception should be 678 re-raised here to stop the caller from waiting. If the call 679 to devserver failed for connection issue, a URLError 680 exception is raised, and caller should retry the call to 681 avoid such network flakiness. 682 683 """ 684 try: 685 return urllib2.urlopen(call).read() == 'True' 686 except urllib2.HTTPError as e: 687 error_markup = e.read() 688 strip = MarkupStripper() 689 try: 690 strip.feed(error_markup.decode('utf_32')) 691 except UnicodeDecodeError: 692 strip.feed(error_markup) 693 raise DevServerException(strip.get_data()) 694 except urllib2.URLError as e: 695 # Could be connection issue, retry it. 696 # For example: <urlopen error [Errno 111] Connection refused> 697 return False 698 699 site_utils.poll_for_condition( 700 all_staged, 701 exception=site_utils.TimeoutError(), 702 timeout=sys.maxint, 703 sleep_interval=_ARTIFACT_STAGE_POLLING_INTERVAL) 704 705 return True 706 707 708 def _call_and_wait(self, call_name, error_message, 709 expected_response=SUCCESS, **kwargs): 710 """Helper method to make a urlopen call, and wait for artifacts staged. 711 712 @param call_name: name of devserver rpc call. 713 @param error_message: Error message to be thrown if response does not 714 match expected_response. 715 @param expected_response: Expected response from rpc, default to 716 |Success|. If it's set to None, do not compare 717 the actual response. Any response is consider 718 to be good. 719 @param kwargs: keyword arguments to make is_staged devserver call. 720 721 @return: The response from rpc. 722 @raise DevServerException upon any return code that's expected_response. 723 724 """ 725 call = self.build_call(call_name, async=True, **kwargs) 726 try: 727 response = urllib2.urlopen(call).read() 728 except httplib.BadStatusLine as e: 729 logging.error(e) 730 raise DevServerException('Received Bad Status line, Devserver %s ' 731 'might have gone down while handling ' 732 'the call: %s' % (self.url(), call)) 733 734 if expected_response and not response == expected_response: 735 raise DevServerException(error_message) 736 737 # `os_type` is needed in build a devserver call, but not needed for 738 # wait_for_artifacts_staged, since that method is implemented by 739 # each ImageServerBase child class. 740 if 'os_type' in kwargs: 741 del kwargs['os_type'] 742 self.wait_for_artifacts_staged(**kwargs) 743 return response 744 745 746 def _stage_artifacts(self, build, artifacts, files, archive_url, **kwargs): 747 """Tell the devserver to download and stage |artifacts| from |image| 748 specified by kwargs. 749 750 This is the main call point for staging any specific artifacts for a 751 given build. To see the list of artifacts one can stage see: 752 753 ~src/platfrom/dev/artifact_info.py. 754 755 This is maintained along with the actual devserver code. 756 757 @param artifacts: A list of artifacts. 758 @param files: A list of files to stage. 759 @param archive_url: Optional parameter that has the archive_url to stage 760 this artifact from. Default is specified in autotest config + 761 image. 762 @param kwargs: keyword arguments that specify the build information, to 763 make stage devserver call. 764 765 @raise DevServerException upon any return code that's not HTTP OK. 766 """ 767 if not archive_url: 768 archive_url = _get_storage_server_for_artifacts(artifacts) + build 769 770 artifacts_arg = ','.join(artifacts) if artifacts else '' 771 files_arg = ','.join(files) if files else '' 772 error_message = ("staging %s for %s failed;" 773 "HTTP OK not accompanied by 'Success'." % 774 ('artifacts=%s files=%s ' % (artifacts_arg, files_arg), 775 build)) 776 777 staging_info = ('build=%s, artifacts=%s, files=%s, archive_url=%s' % 778 (build, artifacts, files, archive_url)) 779 logging.info('Staging artifacts on devserver %s: %s', 780 self.url(), staging_info) 781 if artifacts: 782 server_name = self.get_server_name(self.url()) 783 timer_key = self.create_stats_str( 784 'stage_artifacts', server_name, artifacts) 785 counter_key = self.create_stats_str( 786 'stage_artifacts_count', server_name, artifacts) 787 metadata = self.create_metadata(server_name, build, artifacts, 788 files) 789 autotest_stats.Counter(counter_key, metadata=metadata).increment() 790 timer = autotest_stats.Timer(timer_key, metadata=metadata) 791 timer.start() 792 try: 793 arguments = {'archive_url': archive_url, 794 'artifacts': artifacts_arg, 795 'files': files_arg} 796 if kwargs: 797 arguments.update(kwargs) 798 self.call_and_wait(call_name='stage',error_message=error_message, 799 **arguments) 800 if artifacts: 801 timer.stop() 802 logging.info('Finished staging artifacts: %s', staging_info) 803 except error.TimeoutException: 804 logging.error('stage_artifacts timed out: %s', staging_info) 805 if artifacts: 806 timeout_key = self.create_stats_str( 807 'stage_artifacts_timeout', server_name, artifacts) 808 autotest_stats.Counter(timeout_key, 809 metadata=metadata).increment() 810 raise DevServerException( 811 'stage_artifacts timed out: %s' % staging_info) 812 813 814 def call_and_wait(self, *args, **kwargs): 815 """Helper method to make a urlopen call, and wait for artifacts staged. 816 817 This method needs to be overridden in the subclass to implement the 818 logic to call _call_and_wait. 819 """ 820 raise NotImplementedError 821 822 823 def _trigger_download(self, build, artifacts, files, synchronous=True, 824 **kwargs_build_info): 825 """Tell the devserver to download and stage image specified in 826 kwargs_build_info. 827 828 Tells the devserver to fetch |image| from the image storage server 829 named by _get_image_storage_server(). 830 831 If |synchronous| is True, waits for the entire download to finish 832 staging before returning. Otherwise only the artifacts necessary 833 to start installing images onto DUT's will be staged before returning. 834 A caller can then call finish_download to guarantee the rest of the 835 artifacts have finished staging. 836 837 @param synchronous: if True, waits until all components of the image are 838 staged before returning. 839 @param kwargs_build_info: Dictionary of build information. 840 For CrOS, it is None as build is the CrOS image name. 841 For Android, it is {'target': target, 842 'build_id': build_id, 843 'branch': branch} 844 845 @raise DevServerException upon any return code that's not HTTP OK. 846 847 """ 848 if kwargs_build_info: 849 archive_url = None 850 else: 851 archive_url = _get_image_storage_server() + build 852 error_message = ("trigger_download for %s failed;" 853 "HTTP OK not accompanied by 'Success'." % build) 854 kwargs = {'archive_url': archive_url, 855 'artifacts': artifacts, 856 'files': files, 857 'error_message': error_message} 858 if kwargs_build_info: 859 kwargs.update(kwargs_build_info) 860 861 logging.info('trigger_download starts for %s', build) 862 server_name = self.get_server_name(self.url()) 863 artifacts_list = artifacts.split(',') 864 counter_key = self.create_stats_str( 865 'trigger_download_count', server_name, artifacts_list) 866 metadata = self.create_metadata(server_name, build, artifacts_list) 867 autotest_stats.Counter(counter_key, metadata=metadata).increment() 868 try: 869 response = self.call_and_wait(call_name='stage', **kwargs) 870 logging.info('trigger_download finishes for %s', build) 871 except error.TimeoutException: 872 logging.error('trigger_download timed out for %s.', build) 873 timeout_key = self.create_stats_str( 874 'trigger_download_timeout', server_name, artifacts_list) 875 autotest_stats.Counter(timeout_key, metadata=metadata).increment() 876 raise DevServerException( 877 'trigger_download timed out for %s.' % build) 878 was_successful = response == SUCCESS 879 if was_successful and synchronous: 880 self._finish_download(build, artifacts, files, **kwargs_build_info) 881 882 883 def _finish_download(self, build, artifacts, files, **kwargs_build_info): 884 """Tell the devserver to finish staging image specified in 885 kwargs_build_info. 886 887 If trigger_download is called with synchronous=False, it will return 888 before all artifacts have been staged. This method contacts the 889 devserver and blocks until all staging is completed and should be 890 called after a call to trigger_download. 891 892 @param kwargs_build_info: Dictionary of build information. 893 For CrOS, it is None as build is the CrOS image name. 894 For Android, it is {'target': target, 895 'build_id': build_id, 896 'branch': branch} 897 898 @raise DevServerException upon any return code that's not HTTP OK. 899 """ 900 archive_url = _get_image_storage_server() + build 901 error_message = ("finish_download for %s failed;" 902 "HTTP OK not accompanied by 'Success'." % build) 903 kwargs = {'archive_url': archive_url, 904 'artifacts': artifacts, 905 'files': files, 906 'error_message': error_message} 907 if kwargs_build_info: 908 kwargs.update(kwargs_build_info) 909 try: 910 self.call_and_wait(call_name='stage', **kwargs) 911 except error.TimeoutException: 912 logging.error('finish_download timed out for %s', build) 913 server_name = self.get_server_name(self.url()) 914 artifacts_list = artifacts.split(',') 915 timeout_key = self.create_stats_str( 916 'finish_download_timeout', server_name, artifacts_list) 917 metadata = self.create_metadata(server_name, build, artifacts_list) 918 autotest_stats.Counter(timeout_key, metadata=metadata).increment() 919 raise DevServerException( 920 'finish_download timed out for %s.' % build) 921 922 923 def locate_file(self, file_name, artifacts, build, build_info): 924 """Locate a file with the given file_name on devserver. 925 926 This method calls devserver RPC `locate_file` to look up a file with 927 the given file name inside specified build artifacts. 928 929 @param file_name: Name of the file to look for a file. 930 @param artifacts: A list of artifact names to search for the file. 931 @param build: Name of the build. For Android, it's None as build_info 932 should be used. 933 @param build_info: Dictionary of build information. 934 For CrOS, it is None as build is the CrOS image name. 935 For Android, it is {'target': target, 936 'build_id': build_id, 937 'branch': branch} 938 939 @return: A devserver url to the file. 940 @raise DevServerException upon any return code that's not HTTP OK. 941 """ 942 if not build and not build_info: 943 raise DevServerException('You must specify build information to ' 944 'look for file %s in artifacts %s.' % 945 (file_name, artifacts)) 946 kwargs = {'file_name': file_name, 947 'artifacts': artifacts} 948 if build_info: 949 build_path = '%(branch)s/%(target)s/%(build_id)s' % build_info 950 kwargs.update(build_info) 951 # Devserver treats Android and Brillo build in the same way as they 952 # are both retrieved from Launch Control and have similar build 953 # artifacts. Therefore, os_type for devserver calls is `android` for 954 # both Android and Brillo builds. 955 kwargs['os_type'] = 'android' 956 else: 957 build_path = build 958 kwargs['build'] = build 959 call = self.build_call('locate_file', async=False, **kwargs) 960 try: 961 file_path = urllib2.urlopen(call).read() 962 return os.path.join(self.url(), 'static', build_path, file_path) 963 except httplib.BadStatusLine as e: 964 logging.error(e) 965 raise DevServerException('Received Bad Status line, Devserver %s ' 966 'might have gone down while handling ' 967 'the call: %s' % (self.url(), call)) 968 except error.TimeoutException: 969 error_message = ('Call `locate_file` timed out when looking for %s ' 970 'in artifacts %s in build %s' % 971 (file_name, artifacts, build_info)) 972 logging.error(error_message) 973 raise DevServerException(error_message) 974 975 976class ImageServer(ImageServerBase): 977 """Class for DevServer that handles RPCs related to CrOS images. 978 979 The calls to devserver to stage artifacts, including stage and download, are 980 made in async mode. That is, when caller makes an RPC |stage| to request 981 devserver to stage certain artifacts, devserver handles the call and starts 982 staging artifacts in a new thread, and return |Success| without waiting for 983 staging being completed. When caller receives message |Success|, it polls 984 devserver's is_staged call until all artifacts are staged. 985 Such mechanism is designed to prevent cherrypy threads in devserver being 986 running out, as staging artifacts might take long time, and cherrypy starts 987 with a fixed number of threads that handle devserver rpc. 988 """ 989 990 class ArtifactUrls(object): 991 """A container for URLs of staged artifacts. 992 993 Attributes: 994 full_payload: URL for downloading a staged full release update 995 mton_payload: URL for downloading a staged M-to-N release update 996 nton_payload: URL for downloading a staged N-to-N release update 997 998 """ 999 def __init__(self, full_payload=None, mton_payload=None, 1000 nton_payload=None): 1001 self.full_payload = full_payload 1002 self.mton_payload = mton_payload 1003 self.nton_payload = nton_payload 1004 1005 1006 def wait_for_artifacts_staged(self, archive_url, artifacts='', files=''): 1007 """Polling devserver.is_staged until all artifacts are staged. 1008 1009 @param archive_url: Google Storage URL for the build. 1010 @param artifacts: Comma separated list of artifacts to download. 1011 @param files: Comma separated list of files to download. 1012 @return: True if all artifacts are staged in devserver. 1013 """ 1014 kwargs = {'archive_url': archive_url, 1015 'artifacts': artifacts, 1016 'files': files} 1017 return self._poll_is_staged(**kwargs) 1018 1019 1020 @remote_devserver_call() 1021 def call_and_wait(self, call_name, archive_url, artifacts, files, 1022 error_message, expected_response=SUCCESS): 1023 """Helper method to make a urlopen call, and wait for artifacts staged. 1024 1025 @param call_name: name of devserver rpc call. 1026 @param archive_url: Google Storage URL for the build.. 1027 @param artifacts: Comma separated list of artifacts to download. 1028 @param files: Comma separated list of files to download. 1029 @param expected_response: Expected response from rpc, default to 1030 |Success|. If it's set to None, do not compare 1031 the actual response. Any response is consider 1032 to be good. 1033 @param error_message: Error message to be thrown if response does not 1034 match expected_response. 1035 1036 @return: The response from rpc. 1037 @raise DevServerException upon any return code that's expected_response. 1038 1039 """ 1040 kwargs = {'archive_url': archive_url, 1041 'artifacts': artifacts, 1042 'files': files} 1043 return self._call_and_wait(call_name, error_message, 1044 expected_response, **kwargs) 1045 1046 1047 @remote_devserver_call() 1048 def stage_artifacts(self, image, artifacts=None, files='', 1049 archive_url=None): 1050 """Tell the devserver to download and stage |artifacts| from |image|. 1051 1052 This is the main call point for staging any specific artifacts for a 1053 given build. To see the list of artifacts one can stage see: 1054 1055 ~src/platfrom/dev/artifact_info.py. 1056 1057 This is maintained along with the actual devserver code. 1058 1059 @param image: the image to fetch and stage. 1060 @param artifacts: A list of artifacts. 1061 @param files: A list of files to stage. 1062 @param archive_url: Optional parameter that has the archive_url to stage 1063 this artifact from. Default is specified in autotest config + 1064 image. 1065 1066 @raise DevServerException upon any return code that's not HTTP OK. 1067 """ 1068 if not artifacts and not files: 1069 raise DevServerException('Must specify something to stage.') 1070 image = self.translate(image) 1071 self._stage_artifacts(image, artifacts, files, archive_url) 1072 1073 1074 @remote_devserver_call(timeout_min=0.5) 1075 def list_image_dir(self, image): 1076 """List the contents of the image stage directory, on the devserver. 1077 1078 @param image: The image name, eg: <board>-<branch>/<Milestone>-<build>. 1079 1080 @raise DevServerException upon any return code that's not HTTP OK. 1081 """ 1082 image = self.translate(image) 1083 logging.info('Requesting contents from devserver %s for image %s', 1084 self.url(), image) 1085 archive_url = _get_storage_server_for_artifacts() + image 1086 call = self.build_call('list_image_dir', archive_url=archive_url) 1087 response = urllib2.urlopen(call) 1088 for line in [line.rstrip() for line in response]: 1089 logging.info(line) 1090 1091 1092 def trigger_download(self, image, synchronous=True): 1093 """Tell the devserver to download and stage |image|. 1094 1095 Tells the devserver to fetch |image| from the image storage server 1096 named by _get_image_storage_server(). 1097 1098 If |synchronous| is True, waits for the entire download to finish 1099 staging before returning. Otherwise only the artifacts necessary 1100 to start installing images onto DUT's will be staged before returning. 1101 A caller can then call finish_download to guarantee the rest of the 1102 artifacts have finished staging. 1103 1104 @param image: the image to fetch and stage. 1105 @param synchronous: if True, waits until all components of the image are 1106 staged before returning. 1107 1108 @raise DevServerException upon any return code that's not HTTP OK. 1109 1110 """ 1111 image = self.translate(image) 1112 artifacts = _ARTIFACTS_TO_BE_STAGED_FOR_IMAGE 1113 self._trigger_download(image, artifacts, files='', 1114 synchronous=synchronous) 1115 1116 1117 @remote_devserver_call() 1118 def setup_telemetry(self, build): 1119 """Tell the devserver to setup telemetry for this build. 1120 1121 The devserver will stage autotest and then extract the required files 1122 for telemetry. 1123 1124 @param build: the build to setup telemetry for. 1125 1126 @returns path on the devserver that telemetry is installed to. 1127 """ 1128 build = self.translate(build) 1129 archive_url = _get_image_storage_server() + build 1130 call = self.build_call('setup_telemetry', archive_url=archive_url) 1131 try: 1132 response = urllib2.urlopen(call).read() 1133 except httplib.BadStatusLine as e: 1134 logging.error(e) 1135 raise DevServerException('Received Bad Status line, Devserver %s ' 1136 'might have gone down while handling ' 1137 'the call: %s' % (self.url(), call)) 1138 return response 1139 1140 1141 def finish_download(self, image): 1142 """Tell the devserver to finish staging |image|. 1143 1144 If trigger_download is called with synchronous=False, it will return 1145 before all artifacts have been staged. This method contacts the 1146 devserver and blocks until all staging is completed and should be 1147 called after a call to trigger_download. 1148 1149 @param image: the image to fetch and stage. 1150 @raise DevServerException upon any return code that's not HTTP OK. 1151 """ 1152 image = self.translate(image) 1153 artifacts = _ARTIFACTS_TO_BE_STAGED_FOR_IMAGE_WITH_AUTOTEST 1154 self._finish_download(image, artifacts, files='') 1155 1156 1157 def get_update_url(self, image): 1158 """Returns the url that should be passed to the updater. 1159 1160 @param image: the image that was fetched. 1161 """ 1162 image = self.translate(image) 1163 url_pattern = CONFIG.get_config_value('CROS', 'image_url_pattern', 1164 type=str) 1165 return (url_pattern % (self.url(), image)) 1166 1167 1168 def get_staged_file_url(self, filename, image): 1169 """Returns the url of a staged file for this image on the devserver.""" 1170 return '/'.join([self._get_image_url(image), filename]) 1171 1172 1173 def get_full_payload_url(self, image): 1174 """Returns a URL to a staged full payload. 1175 1176 @param image: the image that was fetched. 1177 1178 @return A fully qualified URL that can be used for downloading the 1179 payload. 1180 1181 """ 1182 return self._get_image_url(image) + '/update.gz' 1183 1184 1185 def get_test_image_url(self, image): 1186 """Returns a URL to a staged test image. 1187 1188 @param image: the image that was fetched. 1189 1190 @return A fully qualified URL that can be used for downloading the 1191 image. 1192 1193 """ 1194 return self._get_image_url(image) + '/chromiumos_test_image.bin' 1195 1196 1197 @remote_devserver_call() 1198 def list_control_files(self, build, suite_name=''): 1199 """Ask the devserver to list all control files for |build|. 1200 1201 @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514) 1202 whose control files the caller wants listed. 1203 @param suite_name: The name of the suite for which we require control 1204 files. 1205 @return None on failure, or a list of control file paths 1206 (e.g. server/site_tests/autoupdate/control) 1207 @raise DevServerException upon any return code that's not HTTP OK. 1208 """ 1209 build = self.translate(build) 1210 call = self.build_call('controlfiles', build=build, 1211 suite_name=suite_name) 1212 response = urllib2.urlopen(call) 1213 return [line.rstrip() for line in response] 1214 1215 1216 @remote_devserver_call() 1217 def get_control_file(self, build, control_path): 1218 """Ask the devserver for the contents of a control file. 1219 1220 @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514) 1221 whose control file the caller wants to fetch. 1222 @param control_path: The file to fetch 1223 (e.g. server/site_tests/autoupdate/control) 1224 @return The contents of the desired file. 1225 @raise DevServerException upon any return code that's not HTTP OK. 1226 """ 1227 build = self.translate(build) 1228 call = self.build_call('controlfiles', build=build, 1229 control_path=control_path) 1230 return urllib2.urlopen(call).read() 1231 1232 1233 @remote_devserver_call() 1234 def get_dependencies_file(self, build): 1235 """Ask the dev server for the contents of the suite dependencies file. 1236 1237 Ask the dev server at |self._dev_server| for the contents of the 1238 pre-processed suite dependencies file (at DEPENDENCIES_FILE) 1239 for |build|. 1240 1241 @param build: The build (e.g. x86-mario-release/R21-2333.0.0) 1242 whose dependencies the caller is interested in. 1243 @return The contents of the dependencies file, which should eval to 1244 a dict of dicts, as per site_utils/suite_preprocessor.py. 1245 @raise DevServerException upon any return code that's not HTTP OK. 1246 """ 1247 build = self.translate(build) 1248 call = self.build_call('controlfiles', 1249 build=build, control_path=DEPENDENCIES_FILE) 1250 return urllib2.urlopen(call).read() 1251 1252 1253 @remote_devserver_call() 1254 def get_latest_build_in_gs(self, board): 1255 """Ask the devservers for the latest offical build in Google Storage. 1256 1257 @param board: The board for who we want the latest official build. 1258 @return A string of the returned build rambi-release/R37-5868.0.0 1259 @raise DevServerException upon any return code that's not HTTP OK. 1260 """ 1261 call = self.build_call( 1262 'xbuddy_translate/remote/%s/latest-official' % board, 1263 image_dir=_get_image_storage_server()) 1264 image_name = urllib2.urlopen(call).read() 1265 return os.path.dirname(image_name) 1266 1267 1268 def translate(self, build_name): 1269 """Translate the build name if it's in LATEST format. 1270 1271 If the build name is in the format [builder]/LATEST, return the latest 1272 build in Google Storage otherwise return the build name as is. 1273 1274 @param build_name: build_name to check. 1275 1276 @return The actual build name to use. 1277 """ 1278 match = re.match(r'([\w-]+)-(\w+)/LATEST', build_name) 1279 if not match: 1280 return build_name 1281 translated_build = self.get_latest_build_in_gs(match.groups()[0]) 1282 logging.debug('Translated relative build %s to %s', build_name, 1283 translated_build) 1284 return translated_build 1285 1286 1287 @classmethod 1288 @remote_devserver_call() 1289 def get_latest_build(cls, target, milestone=''): 1290 """Ask all the devservers for the latest build for a given target. 1291 1292 @param target: The build target, typically a combination of the board 1293 and the type of build e.g. x86-mario-release. 1294 @param milestone: For latest build set to '', for builds only in a 1295 specific milestone set to a str of format Rxx 1296 (e.g. R16). Default: ''. Since we are dealing with a 1297 webserver sending an empty string, '', ensures that 1298 the variable in the URL is ignored as if it was set 1299 to None. 1300 @return A string of the returned build e.g. R20-2226.0.0. 1301 @raise DevServerException upon any return code that's not HTTP OK. 1302 """ 1303 calls = cls.build_all_calls('latestbuild', target=target, 1304 milestone=milestone) 1305 latest_builds = [] 1306 for call in calls: 1307 latest_builds.append(urllib2.urlopen(call).read()) 1308 1309 return max(latest_builds, key=version.LooseVersion) 1310 1311 1312class AndroidBuildServer(ImageServerBase): 1313 """Class for DevServer that handles RPCs related to Android builds. 1314 1315 The calls to devserver to stage artifacts, including stage and download, are 1316 made in async mode. That is, when caller makes an RPC |stage| to request 1317 devserver to stage certain artifacts, devserver handles the call and starts 1318 staging artifacts in a new thread, and return |Success| without waiting for 1319 staging being completed. When caller receives message |Success|, it polls 1320 devserver's is_staged call until all artifacts are staged. 1321 Such mechanism is designed to prevent cherrypy threads in devserver being 1322 running out, as staging artifacts might take long time, and cherrypy starts 1323 with a fixed number of threads that handle devserver rpc. 1324 """ 1325 1326 def wait_for_artifacts_staged(self, target, build_id, branch, 1327 archive_url=None, artifacts='', files=''): 1328 """Polling devserver.is_staged until all artifacts are staged. 1329 1330 @param target: Target of the android build to stage, e.g., 1331 shamu-userdebug. 1332 @param build_id: Build id of the android build to stage. 1333 @param branch: Branch of the android build to stage. 1334 @param archive_url: Google Storage URL for the build. 1335 @param artifacts: Comma separated list of artifacts to download. 1336 @param files: Comma separated list of files to download. 1337 1338 @return: True if all artifacts are staged in devserver. 1339 """ 1340 kwargs = {'target': target, 1341 'build_id': build_id, 1342 'branch': branch, 1343 'artifacts': artifacts, 1344 'files': files, 1345 'os_type': 'android'} 1346 if archive_url: 1347 kwargs['archive_url'] = archive_url 1348 return self._poll_is_staged(**kwargs) 1349 1350 1351 @remote_devserver_call() 1352 def call_and_wait(self, call_name, target, build_id, branch, archive_url, 1353 artifacts, files, error_message, 1354 expected_response=SUCCESS): 1355 """Helper method to make a urlopen call, and wait for artifacts staged. 1356 1357 @param call_name: name of devserver rpc call. 1358 @param target: Target of the android build to stage, e.g., 1359 shamu-userdebug. 1360 @param build_id: Build id of the android build to stage. 1361 @param branch: Branch of the android build to stage. 1362 @param archive_url: Google Storage URL for the CrOS build. 1363 @param artifacts: Comma separated list of artifacts to download. 1364 @param files: Comma separated list of files to download. 1365 @param expected_response: Expected response from rpc, default to 1366 |Success|. If it's set to None, do not compare 1367 the actual response. Any response is consider 1368 to be good. 1369 @param error_message: Error message to be thrown if response does not 1370 match expected_response. 1371 1372 @return: The response from rpc. 1373 @raise DevServerException upon any return code that's expected_response. 1374 1375 """ 1376 kwargs = {'target': target, 1377 'build_id': build_id, 1378 'branch': branch, 1379 'artifacts': artifacts, 1380 'files': files, 1381 'os_type': 'android'} 1382 if archive_url: 1383 kwargs['archive_url'] = archive_url 1384 return self._call_and_wait(call_name, error_message, expected_response, 1385 **kwargs) 1386 1387 1388 @remote_devserver_call() 1389 def stage_artifacts(self, target, build_id, branch, artifacts=None, 1390 files='', archive_url=None): 1391 """Tell the devserver to download and stage |artifacts| from |image|. 1392 1393 This is the main call point for staging any specific artifacts for a 1394 given build. To see the list of artifacts one can stage see: 1395 1396 ~src/platfrom/dev/artifact_info.py. 1397 1398 This is maintained along with the actual devserver code. 1399 1400 @param target: Target of the android build to stage, e.g., 1401 shamu-userdebug. 1402 @param build_id: Build id of the android build to stage. 1403 @param branch: Branch of the android build to stage. 1404 @param artifacts: A list of artifacts. 1405 @param files: A list of files to stage. 1406 @param archive_url: Optional parameter that has the archive_url to stage 1407 this artifact from. Default is specified in autotest config + 1408 image. 1409 1410 @raise DevServerException upon any return code that's not HTTP OK. 1411 """ 1412 android_build_info = {'target': target, 1413 'build_id': build_id, 1414 'branch': branch} 1415 if not artifacts and not files: 1416 raise DevServerException('Must specify something to stage.') 1417 if not all(android_build_info.values()): 1418 raise DevServerException( 1419 'To stage an Android build, must specify target, build id ' 1420 'and branch.') 1421 build = ANDROID_BUILD_NAME_PATTERN % android_build_info 1422 self._stage_artifacts(build, artifacts, files, archive_url, 1423 **android_build_info) 1424 1425 1426 def trigger_download(self, target, build_id, branch, artifacts=None, 1427 is_brillo=False, synchronous=True): 1428 """Tell the devserver to download and stage an Android build. 1429 1430 Tells the devserver to fetch an Android build from the image storage 1431 server named by _get_image_storage_server(). 1432 1433 If |synchronous| is True, waits for the entire download to finish 1434 staging before returning. Otherwise only the artifacts necessary 1435 to start installing images onto DUT's will be staged before returning. 1436 A caller can then call finish_download to guarantee the rest of the 1437 artifacts have finished staging. 1438 1439 @param target: Target of the android build to stage, e.g., 1440 shamu-userdebug. 1441 @param build_id: Build id of the android build to stage. 1442 @param branch: Branch of the android build to stage. 1443 @param artifacts: A string of artifacts separated by comma. If None, 1444 use the default artifacts for Android or Brillo build. 1445 @param is_brillo: Set to True if it's a Brillo build. Default is False. 1446 @param synchronous: if True, waits until all components of the image are 1447 staged before returning. 1448 1449 @raise DevServerException upon any return code that's not HTTP OK. 1450 1451 """ 1452 android_build_info = {'target': target, 1453 'build_id': build_id, 1454 'branch': branch} 1455 build = ANDROID_BUILD_NAME_PATTERN % android_build_info 1456 if not artifacts: 1457 artifacts = (_BRILLO_ARTIFACTS_TO_BE_STAGED_FOR_IMAGE if is_brillo 1458 else _ANDROID_ARTIFACTS_TO_BE_STAGED_FOR_IMAGE) 1459 self._trigger_download(build, artifacts, files='', 1460 synchronous=synchronous, **android_build_info) 1461 1462 1463 def finish_download(self, target, build_id, branch, is_brillo=False): 1464 """Tell the devserver to finish staging an Android build. 1465 1466 If trigger_download is called with synchronous=False, it will return 1467 before all artifacts have been staged. This method contacts the 1468 devserver and blocks until all staging is completed and should be 1469 called after a call to trigger_download. 1470 1471 @param target: Target of the android build to stage, e.g., 1472 shamu-userdebug. 1473 @param build_id: Build id of the android build to stage. 1474 @param branch: Branch of the android build to stage. 1475 @param is_brillo: Set to True if it's a Brillo build. Default is False. 1476 1477 @raise DevServerException upon any return code that's not HTTP OK. 1478 """ 1479 android_build_info = {'target': target, 1480 'build_id': build_id, 1481 'branch': branch} 1482 build = ANDROID_BUILD_NAME_PATTERN % android_build_info 1483 artifacts = (_BRILLO_ARTIFACTS_TO_BE_STAGED_FOR_IMAGE if is_brillo else 1484 _ANDROID_ARTIFACTS_TO_BE_STAGED_FOR_IMAGE) 1485 self._finish_download(build, artifacts, files='', **android_build_info) 1486 1487 1488 def get_staged_file_url(self, filename, target, build_id, branch): 1489 """Returns the url of a staged file for this image on the devserver. 1490 1491 @param filename: Name of the file. 1492 @param target: Target of the android build to stage, e.g., 1493 shamu-userdebug. 1494 @param build_id: Build id of the android build to stage. 1495 @param branch: Branch of the android build to stage. 1496 1497 @return: The url of a staged file for this image on the devserver. 1498 """ 1499 android_build_info = {'target': target, 1500 'build_id': build_id, 1501 'branch': branch, 1502 'os_type': 'android'} 1503 build = ANDROID_BUILD_NAME_PATTERN % android_build_info 1504 return '/'.join([self._get_image_url(build), filename]) 1505 1506 1507 def translate(self, build_name): 1508 """Translate the build name if it's in LATEST format. 1509 1510 If the build name is in the format [branch]/[target]/LATEST, return the 1511 latest build in Launch Control otherwise return the build name as is. 1512 1513 @param build_name: build_name to check. 1514 1515 @return The actual build name to use. 1516 """ 1517 branch, target, build_id = utils.parse_android_build(build_name) 1518 if build_id != 'LATEST': 1519 return build_name 1520 call = self.build_call('latestbuild', branch=branch, target=target, 1521 os_type='android') 1522 translated_build_id = urllib2.urlopen(call).read() 1523 translated_build = (ANDROID_BUILD_NAME_PATTERN % 1524 {'branch': branch, 1525 'target': target, 1526 'build_id': translated_build_id}) 1527 logging.debug('Translated relative build %s to %s', build_name, 1528 translated_build) 1529 return translated_build 1530 1531 1532def _is_load_healthy(load): 1533 """Check if devserver's load meets the minimum threshold. 1534 1535 @param load: The devserver's load stats to check. 1536 1537 @return: True if the load meets the minimum threshold. Return False 1538 otherwise. 1539 1540 """ 1541 # Threshold checks, including CPU load. 1542 if load[DevServer.CPU_LOAD] > DevServer.MAX_CPU_LOAD: 1543 logging.debug('CPU load of devserver %s is at %s%%, which is higher ' 1544 'than the threshold of %s%%', load['devserver'], 1545 load[DevServer.CPU_LOAD], DevServer.MAX_CPU_LOAD) 1546 return False 1547 if load[DevServer.NETWORK_IO] > DevServer.MAX_NETWORK_IO: 1548 logging.debug('Network IO of devserver %s is at %i Bps, which is ' 1549 'higher than the threshold of %i bytes per second.', 1550 load['devserver'], load[DevServer.NETWORK_IO], 1551 DevServer.MAX_NETWORK_IO) 1552 return False 1553 return True 1554 1555 1556def _compare_load(devserver1, devserver2): 1557 """Comparator function to compare load between two devservers. 1558 1559 @param devserver1: A dictionary of devserver load stats to be compared. 1560 @param devserver2: A dictionary of devserver load stats to be compared. 1561 1562 @return: Negative value if the load of `devserver1` is less than the load 1563 of `devserver2`. Return positive value otherwise. 1564 1565 """ 1566 return int(devserver1[DevServer.DISK_IO] - devserver2[DevServer.DISK_IO]) 1567 1568 1569def get_least_loaded_devserver(devserver_type=ImageServer): 1570 """Get the devserver with the least load. 1571 1572 Iterate through all devservers and get the one with least load. 1573 1574 TODO(crbug.com/486278): Devserver with required build already staged should 1575 take higher priority. This will need check_health call to be able to verify 1576 existence of a given build/artifact. Also, in case all devservers are 1577 overloaded, the logic here should fall back to the old behavior that randomly 1578 selects a devserver based on the hash of the image name/url. 1579 1580 @param devserver_type: Type of devserver to select from. Default is set to 1581 ImageServer. 1582 1583 @return: Name of the devserver with the least load. 1584 1585 """ 1586 # get_devserver_load call needs to be made in a new process to allow force 1587 # timeout using signal. 1588 output = multiprocessing.Queue() 1589 processes = [] 1590 for devserver in devserver_type.servers(): 1591 processes.append(multiprocessing.Process( 1592 target=DevServer.get_devserver_load_wrapper, 1593 args=(devserver, TIMEOUT_GET_DEVSERVER_LOAD, output))) 1594 1595 for p in processes: 1596 p.start() 1597 for p in processes: 1598 p.join() 1599 loads = [output.get() for p in processes] 1600 # Filter out any load failed to be retrieved or does not support load check. 1601 loads = [load for load in loads if load and DevServer.CPU_LOAD in load and 1602 DevServer.is_free_disk_ok(load) and 1603 DevServer.is_apache_client_count_ok(load)] 1604 if not loads: 1605 logging.debug('Failed to retrieve load stats from any devserver. No ' 1606 'load balancing can be applied.') 1607 return None 1608 loads = [load for load in loads if _is_load_healthy(load)] 1609 if not loads: 1610 logging.error('No devserver has the capacity to be selected.') 1611 return None 1612 loads = sorted(loads, cmp=_compare_load) 1613 return loads[0]['devserver'] 1614