dev_server.py revision 58424779a23e853a64562a66ccbbcbbeea3d1209
1# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5from distutils import version
6import cStringIO
7import HTMLParser
8import httplib
9import json
10import logging
11import multiprocessing
12import os
13import re
14import socket
15import time
16import urllib2
17import urlparse
18
19from autotest_lib.client.bin import utils as site_utils
20from autotest_lib.client.common_lib import android_utils
21from autotest_lib.client.common_lib import error
22from autotest_lib.client.common_lib import global_config
23from autotest_lib.client.common_lib import utils
24from autotest_lib.client.common_lib.cros import retry
25from autotest_lib.client.common_lib.cros.graphite import autotest_stats
26# TODO(cmasone): redo this class using requests module; http://crosbug.com/30107
27
28
29CONFIG = global_config.global_config
30# This file is generated at build time and specifies, per suite and per test,
31# the DEPENDENCIES list specified in each control file.  It's a dict of dicts:
32# {'bvt':   {'/path/to/autotest/control/site_tests/test1/control': ['dep1']}
33#  'suite': {'/path/to/autotest/control/site_tests/test2/control': ['dep2']}
34#  'power': {'/path/to/autotest/control/site_tests/test1/control': ['dep1'],
35#            '/path/to/autotest/control/site_tests/test3/control': ['dep3']}
36# }
37DEPENDENCIES_FILE = 'test_suites/dependency_info'
38# Number of seconds for caller to poll devserver's is_staged call to check if
39# artifacts are staged.
40_ARTIFACT_STAGE_POLLING_INTERVAL = 5
41# Artifacts that should be staged when client calls devserver RPC to stage an
42# image.
43_ARTIFACTS_TO_BE_STAGED_FOR_IMAGE = 'full_payload,test_suites,stateful'
44# Artifacts that should be staged when client calls devserver RPC to stage an
45# image with autotest artifact.
46_ARTIFACTS_TO_BE_STAGED_FOR_IMAGE_WITH_AUTOTEST = ('full_payload,test_suites,'
47                                                   'control_files,stateful,'
48                                                   'autotest_packages')
49# Artifacts that should be staged when client calls devserver RPC to stage an
50# Android build.
51_BRILLO_ARTIFACTS_TO_BE_STAGED_FOR_IMAGE = ('zip_images,vendor_partitions')
52SKIP_DEVSERVER_HEALTH_CHECK = CONFIG.get_config_value(
53        'CROS', 'skip_devserver_health_check', type=bool)
54# Number of seconds for the call to get devserver load to time out.
55TIMEOUT_GET_DEVSERVER_LOAD = 2.0
56
57# Android artifact path in devserver
58ANDROID_BUILD_NAME_PATTERN = CONFIG.get_config_value(
59        'CROS', 'android_build_name_pattern', type=str).replace('\\', '')
60
61# Return value from a devserver RPC indicating the call succeeded.
62SUCCESS = 'Success'
63
64# The timeout minutes for a given devserver ssh call.
65DEVSERVER_SSH_TIMEOUT_MINS = 1
66
67# The timeout minutes for waiting a devserver staging.
68DEVSERVER_IS_STAGING_RETRY_MIN = 100
69
70# The timeout minutes for waiting a DUT auto-update finished.
71DEVSERVER_IS_CROS_AU_FINISHED_TIMEOUT_MIN = 100
72
73# The total times of devserver triggering CrOS auto-update.
74AU_RETRY_LIMIT = 2
75
76# Number of seconds for caller to poll devserver's get_au_status call to
77# check if cros auto-update is finished.
78CROS_AU_POLLING_INTERVAL = 10
79
80# Number of seconds for intervals between retrying auto-update calls.
81CROS_AU_RETRY_INTERVAL = 20
82
83PREFER_LOCAL_DEVSERVER = CONFIG.get_config_value(
84        'CROS', 'prefer_local_devserver', type=bool, default=False)
85
86ENABLE_SSH_CONNECTION_FOR_DEVSERVER = CONFIG.get_config_value(
87        'CROS', 'enable_ssh_connection_for_devserver', type=bool,
88        default=False)
89
90# Directory to save auto-update logs
91AUTO_UPDATE_LOG_DIR = 'autoupdate_logs'
92
93DEFAULT_SUBNET_MASKBIT = 19
94
95_timer = autotest_stats.Timer('devserver')
96
97
98class DevServerException(Exception):
99    """Raised when the dev server returns a non-200 HTTP response."""
100    pass
101
102
103class MarkupStripper(HTMLParser.HTMLParser):
104    """HTML parser that strips HTML tags, coded characters like &
105
106    Works by, basically, not doing anything for any tags, and only recording
107    the content of text nodes in an internal data structure.
108    """
109    def __init__(self):
110        self.reset()
111        self.fed = []
112
113
114    def handle_data(self, d):
115        """Consume content of text nodes, store it away."""
116        self.fed.append(d)
117
118
119    def get_data(self):
120        """Concatenate and return all stored data."""
121        return ''.join(self.fed)
122
123
124def _strip_http_message(message):
125    """Strip the HTTP marker from the an HTTP message.
126
127    @param message: A string returned by an HTTP call.
128
129    @return: A string with HTTP marker being stripped.
130    """
131    strip = MarkupStripper()
132    try:
133        strip.feed(message.decode('utf_32'))
134    except UnicodeDecodeError:
135        strip.feed(message)
136    return strip.get_data()
137
138
139def _get_image_storage_server():
140    return CONFIG.get_config_value('CROS', 'image_storage_server', type=str)
141
142
143def _get_canary_channel_server():
144    """
145    Get the url of the canary-channel server,
146    eg: gsutil://chromeos-releases/canary-channel/<board>/<release>
147
148    @return: The url to the canary channel server.
149    """
150    return CONFIG.get_config_value('CROS', 'canary_channel_server', type=str)
151
152
153def _get_storage_server_for_artifacts(artifacts=None):
154    """Gets the appropriate storage server for the given artifacts.
155
156    @param artifacts: A list of artifacts we need to stage.
157    @return: The address of the storage server that has these artifacts.
158             The default image storage server if no artifacts are specified.
159    """
160    factory_artifact = global_config.global_config.get_config_value(
161            'CROS', 'factory_artifact', type=str, default='')
162    if artifacts and factory_artifact and factory_artifact in artifacts:
163        return _get_canary_channel_server()
164    return _get_image_storage_server()
165
166
167def _get_dev_server_list():
168    return CONFIG.get_config_value('CROS', 'dev_server', type=list, default=[])
169
170
171def _get_crash_server_list():
172    return CONFIG.get_config_value('CROS', 'crash_server', type=list,
173        default=[])
174
175
176def remote_devserver_call(timeout_min=DEVSERVER_IS_STAGING_RETRY_MIN,
177                          exception_to_raise=DevServerException):
178    """A decorator to use with remote devserver calls.
179
180    This decorator converts urllib2.HTTPErrors into DevServerExceptions
181    with any embedded error info converted into plain text. The method
182    retries on urllib2.URLError or error.CmdError to avoid devserver flakiness.
183    """
184    #pylint: disable=C0111
185    def inner_decorator(method):
186
187        @retry.retry((urllib2.URLError, error.CmdError),
188                     timeout_min=timeout_min,
189                     exception_to_raise=exception_to_raise)
190        def wrapper(*args, **kwargs):
191            """This wrapper actually catches the HTTPError."""
192            try:
193                return method(*args, **kwargs)
194            except urllib2.HTTPError as e:
195                error_markup = e.read()
196                raise DevServerException(_strip_http_message(error_markup))
197
198        return wrapper
199
200    return inner_decorator
201
202
203class DevServer(object):
204    """Base class for all DevServer-like server stubs.
205
206    This is the base class for interacting with all Dev Server-like servers.
207    A caller should instantiate a sub-class of DevServer with:
208
209    host = SubClassServer.resolve(build)
210    server = SubClassServer(host)
211    """
212    _MIN_FREE_DISK_SPACE_GB = 20
213    _MAX_APACHE_CLIENT_COUNT = 75
214    # Threshold for the CPU load percentage for a devserver to be selected.
215    MAX_CPU_LOAD = 80.0
216    # Threshold for the network IO, set to 80MB/s
217    MAX_NETWORK_IO = 1024 * 1024 * 80
218    DISK_IO = 'disk_total_bytes_per_second'
219    NETWORK_IO = 'network_total_bytes_per_second'
220    CPU_LOAD = 'cpu_percent'
221    FREE_DISK = 'free_disk'
222    STAGING_THREAD_COUNT = 'staging_thread_count'
223    APACHE_CLIENT_COUNT = 'apache_client_count'
224
225
226    def __init__(self, devserver):
227        self._devserver = devserver
228
229
230    def url(self):
231        """Returns the url for this devserver."""
232        return self._devserver
233
234
235    @staticmethod
236    def get_server_name(url):
237        """Strip the http:// prefix and port from a url.
238
239        @param url: A url of a server.
240
241        @return the server name without http:// prefix and port.
242
243        """
244        return urlparse.urlparse(url).hostname
245
246
247    @staticmethod
248    def get_server_url(url):
249        """Get the devserver url from a repo url, which includes build info.
250
251        @param url: A job repo url.
252
253        @return A devserver url, e.g., http://127.0.0.10:8080
254        """
255        res = urlparse.urlparse(url)
256        if res.netloc:
257            return res.scheme + '://' + res.netloc
258
259
260    @classmethod
261    def get_devserver_load_wrapper(cls, devserver, timeout_sec, output):
262        """A wrapper function to call get_devserver_load in parallel.
263
264        @param devserver: url of the devserver.
265        @param timeout_sec: Number of seconds before time out the devserver
266                            call.
267        @param output: An output queue to save results to.
268        """
269        load = cls.get_devserver_load(devserver, timeout_min=timeout_sec/60.0)
270        if load:
271            load['devserver'] = devserver
272        output.put(load)
273
274
275    @classmethod
276    def get_devserver_load(cls, devserver,
277                           timeout_min=DEVSERVER_SSH_TIMEOUT_MINS):
278        """Returns True if the |devserver| is healthy to stage build.
279
280        @param devserver: url of the devserver.
281        @param timeout_min: How long to wait in minutes before deciding the
282                            the devserver is not up (float).
283
284        @return: A dictionary of the devserver's load.
285
286        """
287        server_name = DevServer.get_server_name(devserver)
288        # statsd treats |.| as path separator.
289        server_name = server_name.replace('.', '_')
290        call = DevServer._build_call(devserver, 'check_health')
291
292        @remote_devserver_call(timeout_min=timeout_min)
293        def make_call():
294            """Inner method that makes the call."""
295            return cls.run_call(call, timeout=timeout_min*60)
296        try:
297            result_dict = json.load(cStringIO.StringIO(make_call()))
298            for key, val in result_dict.iteritems():
299                try:
300                    autotest_stats.Gauge(server_name).send(key, float(val))
301                except ValueError:
302                    # Ignore all non-numerical health data.
303                    pass
304
305            return result_dict
306        except Exception as e:
307            logging.error('Devserver call failed: "%s", timeout: %s seconds,'
308                          ' Error: %s', call, timeout_min * 60, e)
309
310
311    @staticmethod
312    def is_free_disk_ok(load):
313        """Check if a devserver has enough free disk.
314
315        @param load: A dict of the load of the devserver.
316
317        @return: True if the devserver has enough free disk or disk check is
318                 skipped in global config.
319
320        """
321        if SKIP_DEVSERVER_HEALTH_CHECK:
322            logging.debug('devserver health check is skipped.')
323        elif load[DevServer.FREE_DISK] < DevServer._MIN_FREE_DISK_SPACE_GB:
324            return False
325
326        return True
327
328
329    @staticmethod
330    def is_apache_client_count_ok(load):
331        """Check if a devserver has enough Apache connections available.
332
333        Apache server by default has maximum of 150 concurrent connections. If
334        a devserver has too many live connections, it likely indicates the
335        server is busy handling many long running download requests, e.g.,
336        downloading stateful partitions. It is better not to add more requests
337        to it.
338
339        @param load: A dict of the load of the devserver.
340
341        @return: True if the devserver has enough Apache connections available,
342                 or disk check is skipped in global config.
343
344        """
345        if SKIP_DEVSERVER_HEALTH_CHECK:
346            logging.debug('devserver health check is skipped.')
347        elif DevServer.APACHE_CLIENT_COUNT not in load:
348            logging.debug('Apache client count is not collected from devserver.')
349        elif (load[DevServer.APACHE_CLIENT_COUNT] >
350              DevServer._MAX_APACHE_CLIENT_COUNT):
351            return False
352
353        return True
354
355
356    @classmethod
357    @_timer.decorate
358    def devserver_healthy(cls, devserver,
359                          timeout_min=DEVSERVER_SSH_TIMEOUT_MINS):
360        """Returns True if the |devserver| is healthy to stage build.
361
362        @param devserver: url of the devserver.
363        @param timeout_min: How long to wait in minutes before deciding the
364                            the devserver is not up (float).
365
366        @return: True if devserver is healthy. Return False otherwise.
367
368        """
369        server_name = DevServer.get_server_name(devserver)
370        # statsd treats |.| as path separator.
371        server_name = server_name.replace('.', '_')
372        load = cls.get_devserver_load(devserver, timeout_min=timeout_min)
373        if not load:
374            # Failed to get the load of devserver.
375            autotest_stats.Counter(server_name +
376                                   '.devserver_not_healthy').increment()
377            return False
378
379        apache_ok = DevServer.is_apache_client_count_ok(load)
380        if not apache_ok:
381            logging.error('Devserver check_health failed. Live Apache client '
382                          'count is too high: %d.',
383                          load[DevServer.APACHE_CLIENT_COUNT])
384            autotest_stats.Counter(server_name +
385                                   '.devserver_not_healthy').increment()
386            return False
387
388        disk_ok = DevServer.is_free_disk_ok(load)
389        if not disk_ok:
390            logging.error('Devserver check_health failed. Free disk space is '
391                          'low. Only %dGB is available.',
392                          load[DevServer.FREE_DISK])
393        counter = '.devserver_healthy' if disk_ok else '.devserver_not_healthy'
394        # This counter indicates the load of a devserver. By comparing the
395        # value of this counter for all devservers, we can evaluate the
396        # load balancing across all devservers.
397        autotest_stats.Counter(server_name + counter).increment()
398        return disk_ok
399
400
401    @staticmethod
402    def _build_call(host, method, **kwargs):
403        """Build a URL to |host| that calls |method|, passing |kwargs|.
404
405        Builds a URL that calls |method| on the dev server defined by |host|,
406        passing a set of key/value pairs built from the dict |kwargs|.
407
408        @param host: a string that is the host basename e.g. http://server:90.
409        @param method: the dev server method to call.
410        @param kwargs: a dict mapping arg names to arg values.
411        @return the URL string.
412        """
413        argstr = '&'.join(map(lambda x: "%s=%s" % x, kwargs.iteritems()))
414        return "%(host)s/%(method)s?%(argstr)s" % dict(
415                host=host, method=method, argstr=argstr)
416
417
418    def build_call(self, method, **kwargs):
419        """Builds a devserver RPC string that is used by 'run_call()'.
420
421        @param method: remote devserver method to call.
422        """
423        return self._build_call(self._devserver, method, **kwargs)
424
425
426    @classmethod
427    def build_all_calls(cls, method, **kwargs):
428        """Builds a list of URLs that makes RPC calls on all devservers.
429
430        Build a URL that calls |method| on the dev server, passing a set
431        of key/value pairs built from the dict |kwargs|.
432
433        @param method: the dev server method to call.
434        @param kwargs: a dict mapping arg names to arg values
435
436        @return the URL string
437        """
438        calls = []
439        # Note we use cls.servers as servers is class specific.
440        for server in cls.servers():
441            if cls.devserver_healthy(server):
442                calls.append(cls._build_call(server, method, **kwargs))
443
444        return calls
445
446
447    @classmethod
448    def run_call(cls, call, readline=False, timeout=None):
449        """Invoke a given devserver call using urllib.open.
450
451        Open the URL with HTTP, and return the text of the response. Exceptions
452        may be raised as for urllib2.urlopen().
453
454        @param call: a url string that calls a method to a devserver.
455        @param readline: whether read http response line by line.
456        @param timeout: The timeout seconds for this urlopen call.
457
458        @return the results of this call.
459        """
460        if timeout is not None:
461            return utils.urlopen_socket_timeout(
462                    call, timeout=timeout).read()
463        elif readline:
464            response = urllib2.urlopen(call)
465            return [line.rstrip() for line in response]
466        else:
467            return urllib2.urlopen(call).read()
468
469
470    @staticmethod
471    def servers():
472        """Returns a list of servers that can serve as this type of server."""
473        raise NotImplementedError()
474
475
476    @classmethod
477    def get_devservers_in_same_subnet(cls, ip, mask_bits=DEFAULT_SUBNET_MASKBIT,
478                                      unrestricted_only=False):
479        """Get the devservers in the same subnet of the given ip.
480
481        @param ip: The IP address of a dut to look for devserver.
482        @param mask_bits: Number of mask bits. Default is 19.
483        @param unrestricted_only: Set to True to select from devserver in
484                unrestricted subnet only. Default is False.
485
486        @return: A list of devservers in the same subnet of the given ip.
487
488        """
489        # server from cls.servers() is a URL, e.g., http://10.1.1.10:8082, so
490        # we need a dict to return the full devserver path once the IPs are
491        # filtered in get_servers_in_same_subnet.
492        server_names = {}
493        all_devservers = []
494        devservers = (cls.get_unrestricted_devservers() if unrestricted_only
495                      else cls.servers())
496        for server in devservers:
497            server_name = cls.get_server_name(server)
498            server_names[server_name] = server
499            all_devservers.append(server_name)
500        devservers = utils.get_servers_in_same_subnet(ip, mask_bits,
501                                                      all_devservers)
502        return [server_names[s] for s in devservers]
503
504
505    @classmethod
506    def get_unrestricted_devservers(
507                cls, restricted_subnets=utils.RESTRICTED_SUBNETS):
508        """Get the devservers not in any restricted subnet specified in
509        restricted_subnets.
510
511        @param restricted_subnets: A list of restriected subnets.
512
513        @return: A list of devservers not in any restricted subnet.
514
515        """
516        if not restricted_subnets:
517            return cls.servers()
518
519        devservers = []
520        for server in cls.servers():
521            server_name = cls.get_server_name(server)
522            if not utils.get_restricted_subnet(server_name, restricted_subnets):
523                devservers.append(server)
524        return devservers
525
526
527    @classmethod
528    def get_healthy_devserver(cls, build, devservers):
529        """"Get a healthy devserver instance from the list of devservers.
530
531        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514).
532
533        @return: A DevServer object of a healthy devserver. Return None if no
534                 healthy devserver is found.
535
536        """
537        while devservers:
538            hash_index = hash(build) % len(devservers)
539            devserver = devservers.pop(hash_index)
540            if cls.devserver_healthy(devserver):
541                return cls(devserver)
542
543
544    @classmethod
545    def get_available_devservers(cls, hostname=None,
546                                 prefer_local_devserver=PREFER_LOCAL_DEVSERVER,
547                                 restricted_subnets=utils.RESTRICTED_SUBNETS):
548        """Get devservers in the same subnet of the given hostname.
549
550        @param hostname: Hostname of a DUT to choose devserver for.
551
552        @return: A tuple of (devservers, can_retry), devservers is a list of
553                 devservers that's available for the given hostname. can_retry
554                 is a flag that indicate if caller can retry the selection of
555                 devserver if no devserver in the returned devservers can be
556                 used. For example, if hostname is in a restricted subnet,
557                 can_retry will be False.
558        """
559        host_ip = None
560        if hostname:
561            host_ip = site_utils.get_ip_address(hostname)
562            if not host_ip:
563                logging.error('Failed to get IP address of %s. Will pick a '
564                              'devserver without subnet constraint.', hostname)
565
566        if not host_ip:
567            return cls.get_unrestricted_devservers(restricted_subnets), False
568
569        # Go through all restricted subnet settings and check if the DUT is
570        # inside a restricted subnet. If so, only return the devservers in the
571        # restricted subnet and doesn't allow retry.
572        if host_ip and restricted_subnets:
573            for subnet_ip, mask_bits in restricted_subnets:
574                if utils.is_in_same_subnet(host_ip, subnet_ip, mask_bits):
575                    logging.debug('The host %s (%s) is in a restricted subnet. '
576                                  'Try to locate a devserver inside subnet '
577                                  '%s:%d.', hostname, host_ip, subnet_ip,
578                                  mask_bits)
579                    devservers = cls.get_devservers_in_same_subnet(
580                            subnet_ip, mask_bits)
581                    return devservers, False
582
583        # If prefer_local_devserver is set to True and the host is not in
584        # restricted subnet, pick a devserver in the same subnet if possible.
585        # Set can_retry to True so it can pick a different devserver if all
586        # devservers in the same subnet are down.
587        if prefer_local_devserver:
588            return (cls.get_devservers_in_same_subnet(
589                    host_ip, DEFAULT_SUBNET_MASKBIT, True), True)
590
591        return cls.get_unrestricted_devservers(restricted_subnets), False
592
593
594    @classmethod
595    def resolve(cls, build, hostname=None):
596        """"Resolves a build to a devserver instance.
597
598        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514).
599        @param hostname: The hostname of dut that requests a devserver. It's
600                         used to make sure a devserver in the same subnet is
601                         preferred.
602
603        @raise DevServerException: If no devserver is available.
604        """
605        tried_devservers = set()
606        devservers, can_retry = cls.get_available_devservers(hostname)
607        if devservers:
608            tried_devservers |= set(devservers)
609
610        devserver = cls.get_healthy_devserver(build, devservers)
611
612        if not devserver and can_retry:
613            # Find available devservers without dut location constrain.
614            devservers, _ = cls.get_available_devservers()
615            devserver = cls.get_healthy_devserver(build, devservers)
616            if devservers:
617                tried_devservers |= set(devservers)
618        if devserver:
619            return devserver
620        else:
621            error_msg = ('All devservers are currently down: %s. '
622                         'dut hostname: %s' %
623                         (tried_devservers, hostname))
624            logging.error(error_msg)
625            raise DevServerException(error_msg)
626
627
628    @classmethod
629    def random(cls):
630        """Return a random devserver that's available.
631
632        Devserver election in `resolve` method is based on a hash of the
633        build that a caller wants to stage. The purpose is that different
634        callers requesting for the same build can get the same devserver,
635        while the lab is able to distribute different builds across all
636        devservers. That helps to reduce the duplication of builds across
637        all devservers.
638        This function returns a random devserver, by passing a random
639        pseudo build name to `resolve `method.
640        """
641        return cls.resolve(build=str(time.time()))
642
643
644class CrashServer(DevServer):
645    """Class of DevServer that symbolicates crash dumps."""
646
647    @staticmethod
648    def servers():
649        return _get_crash_server_list()
650
651
652    @remote_devserver_call()
653    def symbolicate_dump(self, minidump_path, build):
654        """Ask the devserver to symbolicate the dump at minidump_path.
655
656        Stage the debug symbols for |build| and, if that works, ask the
657        devserver to symbolicate the dump at |minidump_path|.
658
659        @param minidump_path: the on-disk path of the minidump.
660        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514)
661                      whose debug symbols are needed for symbolication.
662        @return The contents of the stack trace
663        @raise DevServerException upon any return code that's not HTTP OK.
664        """
665        try:
666            import requests
667        except ImportError:
668            logging.warning("Can't 'import requests' to connect to dev server.")
669            return ''
670        server_name = self.get_server_name(self.url())
671        server_name = server_name.replace('.', '_')
672        stats_key = 'CrashServer.%s.symbolicate_dump' % server_name
673        autotest_stats.Counter(stats_key).increment()
674        timer = autotest_stats.Timer(stats_key)
675        timer.start()
676        # Symbolicate minidump.
677        call = self.build_call('symbolicate_dump',
678                               archive_url=_get_image_storage_server() + build)
679        request = requests.post(
680                call, files={'minidump': open(minidump_path, 'rb')})
681        if request.status_code == requests.codes.OK:
682            timer.stop()
683            return request.text
684
685        error_fd = cStringIO.StringIO(request.text)
686        raise urllib2.HTTPError(
687                call, request.status_code, request.text, request.headers,
688                error_fd)
689
690
691    @classmethod
692    def get_available_devservers(cls, hostname):
693        """Get all available crash servers.
694
695        Crash server election doesn't need to count the location of hostname.
696
697        @param hostname: Hostname of a DUT to choose devserver for.
698
699        @return: A tuple of (all crash servers, False). can_retry is set to
700                 False, as all crash servers are returned. There is no point to
701                 retry.
702        """
703        return cls.servers(), False
704
705
706class ImageServerBase(DevServer):
707    """Base class for devservers used to stage builds.
708
709    CrOS and Android builds are staged in different ways as they have different
710    sets of artifacts. This base class abstracts the shared functions between
711    the two types of ImageServer.
712    """
713
714    @classmethod
715    def servers(cls):
716        """Returns a list of servers that can serve as a desired type of
717        devserver.
718        """
719        return _get_dev_server_list()
720
721
722    def _get_image_url(self, image):
723        """Returns the url of the directory for this image on the devserver.
724
725        @param image: the image that was fetched.
726        """
727        image = self.translate(image)
728        url_pattern = CONFIG.get_config_value('CROS', 'image_url_pattern',
729                                              type=str)
730        return (url_pattern % (self.url(), image)).replace('update', 'static')
731
732
733    @staticmethod
734    def create_stats_str(subname, server_name, artifacts):
735        """Create a graphite name given the staged items.
736
737        The resulting name will look like
738            'dev_server.subname.DEVSERVER_URL.artifact1_artifact2'
739        The name can be used to create a stats object like
740        stats.Timer, stats.Counter, etc.
741
742        @param subname: A name for the graphite sub path.
743        @param server_name: name of the devserver, e.g 172.22.33.44.
744        @param artifacts: A list of artifacts.
745
746        @return A name described above.
747
748        """
749        staged_items = sorted(artifacts) if artifacts else []
750        staged_items_str = '_'.join(staged_items).replace(
751                '.', '_') if staged_items else None
752        server_name = server_name.replace('.', '_')
753        stats_str = 'dev_server.%s.%s' % (subname, server_name)
754        if staged_items_str:
755            stats_str += '.%s' % staged_items_str
756        return stats_str
757
758
759    @staticmethod
760    def create_metadata(server_name, image, artifacts=None, files=None):
761        """Create a metadata dictionary given the staged items.
762
763        The metadata can be send to metadata db along with stats.
764
765        @param server_name: name of the devserver, e.g 172.22.33.44.
766        @param image: The name of the image.
767        @param artifacts: A list of artifacts.
768        @param files: A list of files.
769
770        @return A metadata dictionary.
771
772        """
773        metadata = {'devserver': server_name,
774                    'image': image,
775                    '_type': 'devserver'}
776        if artifacts:
777            metadata['artifacts'] = ' '.join(artifacts)
778        if files:
779            metadata['files'] = ' '.join(files)
780        return metadata
781
782
783    @classmethod
784    def run_ssh_call(cls, call, readline=False, timeout=None):
785        """Construct an ssh-based rpc call, and execute it.
786
787        @param call: a url string that calls a method to a devserver.
788        @param readline: whether read http response line by line.
789        @param timeout: The timeout seconds for ssh call.
790
791        @return the results of this call.
792        """
793        hostname = urlparse.urlparse(call).hostname
794        ssh_call = 'ssh %s \'curl "%s"\'' % (hostname, utils.sh_escape(call))
795        timeout_seconds = timeout if timeout else DEVSERVER_SSH_TIMEOUT_MINS*60
796        try:
797            result = utils.run(ssh_call, timeout=timeout_seconds)
798        except error.CmdError as e:
799            logging.debug('Error occurred with exit_code %d when executing the '
800                          'ssh call: %s.', e.result_obj.exit_status,
801                          e.result_obj.stderr)
802            stats_str = 'dev_server.%s.%s' % (hostname.replace('.', '_'),
803                                              'ssh_dev_server_failure')
804            autotest_stats.Counter(stats_str).increment()
805            raise
806        response = result.stdout
807
808        # If the curl command's returned HTTP response contains certain
809        # exception string, raise the DevServerException of the response.
810        if 'DownloaderException' in response:
811            raise DevServerException(_strip_http_message(response))
812
813        if readline:
814            # Remove line terminators and trailing whitespace
815            response = response.splitlines()
816            return [line.rstrip() for line in response]
817
818        return response
819
820
821    @classmethod
822    def run_call(cls, call, readline=False, timeout=None):
823        """Invoke a given devserver call using urllib.open or ssh.
824
825        Open the URL with HTTP or SSH-based HTTP, and return the text of the
826        response. Exceptions may be raised as for urllib2.urlopen() or
827        utils.run().
828
829        @param call: a url string that calls a method to a devserver.
830        @param readline: whether read http response line by line.
831        @param timeout: The timeout seconds for urlopen call or ssh call.
832
833        @return the results of this call.
834        """
835        if not ENABLE_SSH_CONNECTION_FOR_DEVSERVER:
836            return super(ImageServerBase, cls).run_call(
837                    call, readline=readline, timeout=timeout)
838        else:
839            return cls.run_ssh_call(
840                    call, readline=readline, timeout=timeout)
841
842
843    def _poll_is_staged(self, **kwargs):
844        """Polling devserver.is_staged until all artifacts are staged.
845
846        @param kwargs: keyword arguments to make is_staged devserver call.
847
848        @return: True if all artifacts are staged in devserver.
849        """
850        call = self.build_call('is_staged', **kwargs)
851
852        def all_staged():
853            """Call devserver.is_staged rpc to check if all files are staged.
854
855            @return: True if all artifacts are staged in devserver. False
856                     otherwise.
857            @rasies DevServerException, the exception is a wrapper of all
858                    exceptions that were raised when devserver tried to download
859                    the artifacts. devserver raises an HTTPError or a CmdError
860                    when an exception was raised in the code. Such exception
861                    should be re-raised here to stop the caller from waiting.
862                    If the call to devserver failed for connection issue, a
863                    URLError exception is raised, and caller should retry the
864                    call to avoid such network flakiness.
865
866            """
867            try:
868                result = self.run_call(call)
869                logging.debug('whether artifact is staged: %r', result)
870                return result == 'True'
871            except urllib2.HTTPError as e:
872                error_markup = e.read()
873                raise DevServerException(_strip_http_message(error_markup))
874            except urllib2.URLError as e:
875                # Could be connection issue, retry it.
876                # For example: <urlopen error [Errno 111] Connection refused>
877                logging.error('URLError happens in is_stage: %r', e)
878                return False
879            except error.CmdError as e:
880                # Retry if SSH failed to connect to the devserver.
881                logging.warning('CmdError happens in is_stage: %r, will retry', e)
882                return False
883
884        site_utils.poll_for_condition(
885                all_staged,
886                exception=site_utils.TimeoutError(),
887                timeout=DEVSERVER_IS_STAGING_RETRY_MIN * 60,
888                sleep_interval=_ARTIFACT_STAGE_POLLING_INTERVAL)
889
890        return True
891
892
893    def _call_and_wait(self, call_name, error_message,
894                       expected_response=SUCCESS, **kwargs):
895        """Helper method to make a urlopen call, and wait for artifacts staged.
896
897        @param call_name: name of devserver rpc call.
898        @param error_message: Error message to be thrown if response does not
899                              match expected_response.
900        @param expected_response: Expected response from rpc, default to
901                                  |Success|. If it's set to None, do not compare
902                                  the actual response. Any response is consider
903                                  to be good.
904        @param kwargs: keyword arguments to make is_staged devserver call.
905
906        @return: The response from rpc.
907        @raise DevServerException upon any return code that's expected_response.
908
909        """
910        call = self.build_call(call_name, async=True, **kwargs)
911        try:
912            response = self.run_call(call)
913            logging.debug('response for RPC: %r', response)
914        except httplib.BadStatusLine as e:
915            logging.error(e)
916            raise DevServerException('Received Bad Status line, Devserver %s '
917                                     'might have gone down while handling '
918                                     'the call: %s' % (self.url(), call))
919
920        if expected_response and not response == expected_response:
921                raise DevServerException(error_message)
922
923        # `os_type` is needed in build a devserver call, but not needed for
924        # wait_for_artifacts_staged, since that method is implemented by
925        # each ImageServerBase child class.
926        if 'os_type' in kwargs:
927            del kwargs['os_type']
928        self.wait_for_artifacts_staged(**kwargs)
929        return response
930
931
932    def _stage_artifacts(self, build, artifacts, files, archive_url, **kwargs):
933        """Tell the devserver to download and stage |artifacts| from |image|
934        specified by kwargs.
935
936        This is the main call point for staging any specific artifacts for a
937        given build. To see the list of artifacts one can stage see:
938
939        ~src/platfrom/dev/artifact_info.py.
940
941        This is maintained along with the actual devserver code.
942
943        @param artifacts: A list of artifacts.
944        @param files: A list of files to stage.
945        @param archive_url: Optional parameter that has the archive_url to stage
946                this artifact from. Default is specified in autotest config +
947                image.
948        @param kwargs: keyword arguments that specify the build information, to
949                make stage devserver call.
950
951        @raise DevServerException upon any return code that's not HTTP OK.
952        """
953        if not archive_url:
954            archive_url = _get_storage_server_for_artifacts(artifacts) + build
955
956        artifacts_arg = ','.join(artifacts) if artifacts else ''
957        files_arg = ','.join(files) if files else ''
958        error_message = ("staging %s for %s failed;"
959                         "HTTP OK not accompanied by 'Success'." %
960                         ('artifacts=%s files=%s ' % (artifacts_arg, files_arg),
961                          build))
962
963        staging_info = ('build=%s, artifacts=%s, files=%s, archive_url=%s' %
964                        (build, artifacts, files, archive_url))
965        logging.info('Staging artifacts on devserver %s: %s',
966                     self.url(), staging_info)
967        if artifacts:
968            server_name = self.get_server_name(self.url())
969            timer_key = self.create_stats_str(
970                    'stage_artifacts', server_name, artifacts)
971            counter_key = self.create_stats_str(
972                    'stage_artifacts_count', server_name, artifacts)
973            metadata = self.create_metadata(server_name, build, artifacts,
974                                            files)
975            autotest_stats.Counter(counter_key, metadata=metadata).increment()
976            timer = autotest_stats.Timer(timer_key, metadata=metadata)
977            timer.start()
978        try:
979            arguments = {'archive_url': archive_url,
980                         'artifacts': artifacts_arg,
981                         'files': files_arg}
982            if kwargs:
983                arguments.update(kwargs)
984            self.call_and_wait(call_name='stage', error_message=error_message,
985                               **arguments)
986            if artifacts:
987                timer.stop()
988            logging.info('Finished staging artifacts: %s', staging_info)
989        except (site_utils.TimeoutError, error.TimeoutException):
990            logging.error('stage_artifacts timed out: %s', staging_info)
991            if artifacts:
992                timeout_key = self.create_stats_str(
993                        'stage_artifacts_timeout', server_name, artifacts)
994                autotest_stats.Counter(timeout_key,
995                                       metadata=metadata).increment()
996            raise DevServerException(
997                    'stage_artifacts timed out: %s' % staging_info)
998
999
1000    def call_and_wait(self, *args, **kwargs):
1001        """Helper method to make a urlopen call, and wait for artifacts staged.
1002
1003        This method needs to be overridden in the subclass to implement the
1004        logic to call _call_and_wait.
1005        """
1006        raise NotImplementedError
1007
1008
1009    def _trigger_download(self, build, artifacts, files, synchronous=True,
1010                          **kwargs_build_info):
1011        """Tell the devserver to download and stage image specified in
1012        kwargs_build_info.
1013
1014        Tells the devserver to fetch |image| from the image storage server
1015        named by _get_image_storage_server().
1016
1017        If |synchronous| is True, waits for the entire download to finish
1018        staging before returning. Otherwise only the artifacts necessary
1019        to start installing images onto DUT's will be staged before returning.
1020        A caller can then call finish_download to guarantee the rest of the
1021        artifacts have finished staging.
1022
1023        @param synchronous: if True, waits until all components of the image are
1024               staged before returning.
1025        @param kwargs_build_info: Dictionary of build information.
1026                For CrOS, it is None as build is the CrOS image name.
1027                For Android, it is {'target': target,
1028                                    'build_id': build_id,
1029                                    'branch': branch}
1030
1031        @raise DevServerException upon any return code that's not HTTP OK.
1032
1033        """
1034        if kwargs_build_info:
1035            archive_url = None
1036        else:
1037            archive_url = _get_image_storage_server() + build
1038        error_message = ("trigger_download for %s failed;"
1039                         "HTTP OK not accompanied by 'Success'." % build)
1040        kwargs = {'archive_url': archive_url,
1041                  'artifacts': artifacts,
1042                  'files': files,
1043                  'error_message': error_message}
1044        if kwargs_build_info:
1045            kwargs.update(kwargs_build_info)
1046
1047        logging.info('trigger_download starts for %s', build)
1048        server_name = self.get_server_name(self.url())
1049        artifacts_list = artifacts.split(',')
1050        counter_key = self.create_stats_str(
1051                'trigger_download_count', server_name, artifacts_list)
1052        metadata = self.create_metadata(server_name, build, artifacts_list)
1053        autotest_stats.Counter(counter_key, metadata=metadata).increment()
1054        try:
1055            response = self.call_and_wait(call_name='stage', **kwargs)
1056            logging.info('trigger_download finishes for %s', build)
1057        except (site_utils.TimeoutError, error.TimeoutException):
1058            logging.error('trigger_download timed out for %s.', build)
1059            timeout_key = self.create_stats_str(
1060                    'trigger_download_timeout', server_name, artifacts_list)
1061            autotest_stats.Counter(timeout_key, metadata=metadata).increment()
1062            raise DevServerException(
1063                    'trigger_download timed out for %s.' % build)
1064        was_successful = response == SUCCESS
1065        if was_successful and synchronous:
1066            self._finish_download(build, artifacts, files, **kwargs_build_info)
1067
1068
1069    def _finish_download(self, build, artifacts, files, **kwargs_build_info):
1070        """Tell the devserver to finish staging image specified in
1071        kwargs_build_info.
1072
1073        If trigger_download is called with synchronous=False, it will return
1074        before all artifacts have been staged. This method contacts the
1075        devserver and blocks until all staging is completed and should be
1076        called after a call to trigger_download.
1077
1078        @param kwargs_build_info: Dictionary of build information.
1079                For CrOS, it is None as build is the CrOS image name.
1080                For Android, it is {'target': target,
1081                                    'build_id': build_id,
1082                                    'branch': branch}
1083
1084        @raise DevServerException upon any return code that's not HTTP OK.
1085        """
1086        archive_url = _get_image_storage_server() + build
1087        error_message = ("finish_download for %s failed;"
1088                         "HTTP OK not accompanied by 'Success'." % build)
1089        kwargs = {'archive_url': archive_url,
1090                  'artifacts': artifacts,
1091                  'files': files,
1092                  'error_message': error_message}
1093        if kwargs_build_info:
1094            kwargs.update(kwargs_build_info)
1095        try:
1096            self.call_and_wait(call_name='stage', **kwargs)
1097        except (site_utils.TimeoutError, error.TimeoutException):
1098            logging.error('finish_download timed out for %s', build)
1099            server_name = self.get_server_name(self.url())
1100            artifacts_list = artifacts.split(',')
1101            timeout_key = self.create_stats_str(
1102                    'finish_download_timeout', server_name, artifacts_list)
1103            metadata = self.create_metadata(server_name, build, artifacts_list)
1104            autotest_stats.Counter(timeout_key, metadata=metadata).increment()
1105            raise DevServerException(
1106                    'finish_download timed out for %s.' % build)
1107
1108
1109    @remote_devserver_call()
1110    def locate_file(self, file_name, artifacts, build, build_info):
1111        """Locate a file with the given file_name on devserver.
1112
1113        This method calls devserver RPC `locate_file` to look up a file with
1114        the given file name inside specified build artifacts.
1115
1116        @param file_name: Name of the file to look for a file.
1117        @param artifacts: A list of artifact names to search for the file.
1118        @param build: Name of the build. For Android, it's None as build_info
1119                should be used.
1120        @param build_info: Dictionary of build information.
1121                For CrOS, it is None as build is the CrOS image name.
1122                For Android, it is {'target': target,
1123                                    'build_id': build_id,
1124                                    'branch': branch}
1125
1126        @return: A devserver url to the file.
1127        @raise DevServerException upon any return code that's not HTTP OK.
1128        """
1129        if not build and not build_info:
1130            raise DevServerException('You must specify build information to '
1131                                     'look for file %s in artifacts %s.' %
1132                                     (file_name, artifacts))
1133        kwargs = {'file_name': file_name,
1134                  'artifacts': artifacts}
1135        if build_info:
1136            build_path = '%(branch)s/%(target)s/%(build_id)s' % build_info
1137            kwargs.update(build_info)
1138            # Devserver treats Android and Brillo build in the same way as they
1139            # are both retrieved from Launch Control and have similar build
1140            # artifacts. Therefore, os_type for devserver calls is `android` for
1141            # both Android and Brillo builds.
1142            kwargs['os_type'] = 'android'
1143        else:
1144            build_path = build
1145            kwargs['build'] = build
1146        call = self.build_call('locate_file', async=False, **kwargs)
1147        try:
1148            file_path = self.run_call(call)
1149            return os.path.join(self.url(), 'static', build_path, file_path)
1150        except httplib.BadStatusLine as e:
1151            logging.error(e)
1152            raise DevServerException('Received Bad Status line, Devserver %s '
1153                                     'might have gone down while handling '
1154                                     'the call: %s' % (self.url(), call))
1155
1156
1157    @remote_devserver_call()
1158    def list_control_files(self, build, suite_name=''):
1159        """Ask the devserver to list all control files for |build|.
1160
1161        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514)
1162                      whose control files the caller wants listed.
1163        @param suite_name: The name of the suite for which we require control
1164                           files.
1165        @return None on failure, or a list of control file paths
1166                (e.g. server/site_tests/autoupdate/control)
1167        @raise DevServerException upon any return code that's not HTTP OK.
1168        """
1169        build = self.translate(build)
1170        call = self.build_call('controlfiles', build=build,
1171                               suite_name=suite_name)
1172        return self.run_call(call, readline=True)
1173
1174
1175    @remote_devserver_call()
1176    def get_control_file(self, build, control_path):
1177        """Ask the devserver for the contents of a control file.
1178
1179        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514)
1180                      whose control file the caller wants to fetch.
1181        @param control_path: The file to fetch
1182                             (e.g. server/site_tests/autoupdate/control)
1183        @return The contents of the desired file.
1184        @raise DevServerException upon any return code that's not HTTP OK.
1185        """
1186        build = self.translate(build)
1187        call = self.build_call('controlfiles', build=build,
1188                               control_path=control_path)
1189        return self.run_call(call)
1190
1191
1192    @remote_devserver_call()
1193    def list_suite_controls(self, build, suite_name=''):
1194        """Ask the devserver to list contents of all control files for |build|.
1195
1196        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514)
1197                      whose control files' contents the caller wants returned.
1198        @param suite_name: The name of the suite for which we require control
1199                           files.
1200        @return None on failure, or a dict of contents of all control files
1201            (e.g. {'path1': "#Copyright controls ***", ...,
1202                pathX': "#Copyright controls ***"}
1203        @raise DevServerException upon any return code that's not HTTP OK.
1204        """
1205        build = self.translate(build)
1206        call = self.build_call('list_suite_controls', build=build,
1207                               suite_name=suite_name)
1208        return json.load(cStringIO.StringIO(self.run_call(call)))
1209
1210
1211class ImageServer(ImageServerBase):
1212    """Class for DevServer that handles RPCs related to CrOS images.
1213
1214    The calls to devserver to stage artifacts, including stage and download, are
1215    made in async mode. That is, when caller makes an RPC |stage| to request
1216    devserver to stage certain artifacts, devserver handles the call and starts
1217    staging artifacts in a new thread, and return |Success| without waiting for
1218    staging being completed. When caller receives message |Success|, it polls
1219    devserver's is_staged call until all artifacts are staged.
1220    Such mechanism is designed to prevent cherrypy threads in devserver being
1221    running out, as staging artifacts might take long time, and cherrypy starts
1222    with a fixed number of threads that handle devserver rpc.
1223    """
1224
1225    class ArtifactUrls(object):
1226        """A container for URLs of staged artifacts.
1227
1228        Attributes:
1229            full_payload: URL for downloading a staged full release update
1230            mton_payload: URL for downloading a staged M-to-N release update
1231            nton_payload: URL for downloading a staged N-to-N release update
1232
1233        """
1234        def __init__(self, full_payload=None, mton_payload=None,
1235                     nton_payload=None):
1236            self.full_payload = full_payload
1237            self.mton_payload = mton_payload
1238            self.nton_payload = nton_payload
1239
1240
1241    def wait_for_artifacts_staged(self, archive_url, artifacts='', files=''):
1242        """Polling devserver.is_staged until all artifacts are staged.
1243
1244        @param archive_url: Google Storage URL for the build.
1245        @param artifacts: Comma separated list of artifacts to download.
1246        @param files: Comma separated list of files to download.
1247        @return: True if all artifacts are staged in devserver.
1248        """
1249        kwargs = {'archive_url': archive_url,
1250                  'artifacts': artifacts,
1251                  'files': files}
1252        return self._poll_is_staged(**kwargs)
1253
1254
1255    @remote_devserver_call()
1256    def call_and_wait(self, call_name, archive_url, artifacts, files,
1257                      error_message, expected_response=SUCCESS):
1258        """Helper method to make a urlopen call, and wait for artifacts staged.
1259
1260        @param call_name: name of devserver rpc call.
1261        @param archive_url: Google Storage URL for the build..
1262        @param artifacts: Comma separated list of artifacts to download.
1263        @param files: Comma separated list of files to download.
1264        @param expected_response: Expected response from rpc, default to
1265                                  |Success|. If it's set to None, do not compare
1266                                  the actual response. Any response is consider
1267                                  to be good.
1268        @param error_message: Error message to be thrown if response does not
1269                              match expected_response.
1270
1271        @return: The response from rpc.
1272        @raise DevServerException upon any return code that's expected_response.
1273
1274        """
1275        kwargs = {'archive_url': archive_url,
1276                  'artifacts': artifacts,
1277                  'files': files}
1278        return self._call_and_wait(call_name, error_message,
1279                                   expected_response, **kwargs)
1280
1281
1282    @remote_devserver_call()
1283    def stage_artifacts(self, image=None, artifacts=None, files='',
1284                        archive_url=None):
1285        """Tell the devserver to download and stage |artifacts| from |image|.
1286
1287         This is the main call point for staging any specific artifacts for a
1288        given build. To see the list of artifacts one can stage see:
1289
1290        ~src/platfrom/dev/artifact_info.py.
1291
1292        This is maintained along with the actual devserver code.
1293
1294        @param image: the image to fetch and stage.
1295        @param artifacts: A list of artifacts.
1296        @param files: A list of files to stage.
1297        @param archive_url: Optional parameter that has the archive_url to stage
1298                this artifact from. Default is specified in autotest config +
1299                image.
1300
1301        @raise DevServerException upon any return code that's not HTTP OK.
1302        """
1303        if not artifacts and not files:
1304            raise DevServerException('Must specify something to stage.')
1305        image = self.translate(image)
1306        self._stage_artifacts(image, artifacts, files, archive_url)
1307
1308
1309    @remote_devserver_call(timeout_min=DEVSERVER_SSH_TIMEOUT_MINS)
1310    def list_image_dir(self, image):
1311        """List the contents of the image stage directory, on the devserver.
1312
1313        @param image: The image name, eg: <board>-<branch>/<Milestone>-<build>.
1314
1315        @raise DevServerException upon any return code that's not HTTP OK.
1316        """
1317        image = self.translate(image)
1318        logging.info('Requesting contents from devserver %s for image %s',
1319                     self.url(), image)
1320        archive_url = _get_storage_server_for_artifacts() + image
1321        call = self.build_call('list_image_dir', archive_url=archive_url)
1322        response = self.run_call(call, readline=True)
1323        for line in response:
1324            logging.info(line)
1325
1326
1327    def trigger_download(self, image, synchronous=True):
1328        """Tell the devserver to download and stage |image|.
1329
1330        Tells the devserver to fetch |image| from the image storage server
1331        named by _get_image_storage_server().
1332
1333        If |synchronous| is True, waits for the entire download to finish
1334        staging before returning. Otherwise only the artifacts necessary
1335        to start installing images onto DUT's will be staged before returning.
1336        A caller can then call finish_download to guarantee the rest of the
1337        artifacts have finished staging.
1338
1339        @param image: the image to fetch and stage.
1340        @param synchronous: if True, waits until all components of the image are
1341               staged before returning.
1342
1343        @raise DevServerException upon any return code that's not HTTP OK.
1344
1345        """
1346        image = self.translate(image)
1347        artifacts = _ARTIFACTS_TO_BE_STAGED_FOR_IMAGE
1348        self._trigger_download(image, artifacts, files='',
1349                               synchronous=synchronous)
1350
1351
1352    @remote_devserver_call()
1353    def setup_telemetry(self, build):
1354        """Tell the devserver to setup telemetry for this build.
1355
1356        The devserver will stage autotest and then extract the required files
1357        for telemetry.
1358
1359        @param build: the build to setup telemetry for.
1360
1361        @returns path on the devserver that telemetry is installed to.
1362        """
1363        build = self.translate(build)
1364        archive_url = _get_image_storage_server() + build
1365        call = self.build_call('setup_telemetry', archive_url=archive_url)
1366        try:
1367            response = self.run_call(call)
1368        except httplib.BadStatusLine as e:
1369            logging.error(e)
1370            raise DevServerException('Received Bad Status line, Devserver %s '
1371                                     'might have gone down while handling '
1372                                     'the call: %s' % (self.url(), call))
1373        return response
1374
1375
1376    def finish_download(self, image):
1377        """Tell the devserver to finish staging |image|.
1378
1379        If trigger_download is called with synchronous=False, it will return
1380        before all artifacts have been staged. This method contacts the
1381        devserver and blocks until all staging is completed and should be
1382        called after a call to trigger_download.
1383
1384        @param image: the image to fetch and stage.
1385        @raise DevServerException upon any return code that's not HTTP OK.
1386        """
1387        image = self.translate(image)
1388        artifacts = _ARTIFACTS_TO_BE_STAGED_FOR_IMAGE_WITH_AUTOTEST
1389        self._finish_download(image, artifacts, files='')
1390
1391
1392    def get_update_url(self, image):
1393        """Returns the url that should be passed to the updater.
1394
1395        @param image: the image that was fetched.
1396        """
1397        image = self.translate(image)
1398        url_pattern = CONFIG.get_config_value('CROS', 'image_url_pattern',
1399                                              type=str)
1400        return (url_pattern % (self.url(), image))
1401
1402
1403    def get_staged_file_url(self, filename, image):
1404        """Returns the url of a staged file for this image on the devserver."""
1405        return '/'.join([self._get_image_url(image), filename])
1406
1407
1408    def get_full_payload_url(self, image):
1409        """Returns a URL to a staged full payload.
1410
1411        @param image: the image that was fetched.
1412
1413        @return A fully qualified URL that can be used for downloading the
1414                payload.
1415
1416        """
1417        return self._get_image_url(image) + '/update.gz'
1418
1419
1420    def get_test_image_url(self, image):
1421        """Returns a URL to a staged test image.
1422
1423        @param image: the image that was fetched.
1424
1425        @return A fully qualified URL that can be used for downloading the
1426                image.
1427
1428        """
1429        return self._get_image_url(image) + '/chromiumos_test_image.bin'
1430
1431
1432    @remote_devserver_call()
1433    def get_dependencies_file(self, build):
1434        """Ask the dev server for the contents of the suite dependencies file.
1435
1436        Ask the dev server at |self._dev_server| for the contents of the
1437        pre-processed suite dependencies file (at DEPENDENCIES_FILE)
1438        for |build|.
1439
1440        @param build: The build (e.g. x86-mario-release/R21-2333.0.0)
1441                      whose dependencies the caller is interested in.
1442        @return The contents of the dependencies file, which should eval to
1443                a dict of dicts, as per site_utils/suite_preprocessor.py.
1444        @raise DevServerException upon any return code that's not HTTP OK.
1445        """
1446        build = self.translate(build)
1447        call = self.build_call('controlfiles',
1448                               build=build, control_path=DEPENDENCIES_FILE)
1449        return self.run_call(call)
1450
1451
1452    @remote_devserver_call()
1453    def get_latest_build_in_gs(self, board):
1454        """Ask the devservers for the latest offical build in Google Storage.
1455
1456        @param board: The board for who we want the latest official build.
1457        @return A string of the returned build rambi-release/R37-5868.0.0
1458        @raise DevServerException upon any return code that's not HTTP OK.
1459        """
1460        call = self.build_call(
1461                'xbuddy_translate/remote/%s/latest-official' % board,
1462                image_dir=_get_image_storage_server())
1463        image_name = self.run_call(call)
1464        return os.path.dirname(image_name)
1465
1466
1467    def translate(self, build_name):
1468        """Translate the build name if it's in LATEST format.
1469
1470        If the build name is in the format [builder]/LATEST, return the latest
1471        build in Google Storage otherwise return the build name as is.
1472
1473        @param build_name: build_name to check.
1474
1475        @return The actual build name to use.
1476        """
1477        match = re.match(r'([\w-]+)-(\w+)/LATEST', build_name, re.I)
1478        if not match:
1479            return build_name
1480        translated_build = self.get_latest_build_in_gs(match.groups()[0])
1481        logging.debug('Translated relative build %s to %s', build_name,
1482                      translated_build)
1483        return translated_build
1484
1485
1486    @classmethod
1487    @remote_devserver_call()
1488    def get_latest_build(cls, target, milestone=''):
1489        """Ask all the devservers for the latest build for a given target.
1490
1491        @param target: The build target, typically a combination of the board
1492                       and the type of build e.g. x86-mario-release.
1493        @param milestone:  For latest build set to '', for builds only in a
1494                           specific milestone set to a str of format Rxx
1495                           (e.g. R16). Default: ''. Since we are dealing with a
1496                           webserver sending an empty string, '', ensures that
1497                           the variable in the URL is ignored as if it was set
1498                           to None.
1499        @return A string of the returned build e.g. R20-2226.0.0.
1500        @raise DevServerException upon any return code that's not HTTP OK.
1501        """
1502        calls = cls.build_all_calls('latestbuild', target=target,
1503                                    milestone=milestone)
1504        latest_builds = []
1505        for call in calls:
1506            latest_builds.append(cls.run_call(call))
1507
1508        return max(latest_builds, key=version.LooseVersion)
1509
1510
1511    @remote_devserver_call()
1512    def _kill_au_process_for_host(self, **kwargs):
1513        """Kill the triggerred auto_update process if error happens in cros_au.
1514
1515        @param kwargs: Arguments to make kill_au_proc devserver call.
1516        """
1517        call = self.build_call('kill_au_proc', **kwargs)
1518        response = self.run_call(call)
1519        if not response == 'True':
1520            raise DevServerException(
1521                    'Failed to kill the triggerred CrOS auto_update process'
1522                    'on devserver %s, the response is %s' % (
1523                            self.url(), response))
1524
1525
1526    def kill_au_process_for_host(self, host_name):
1527        """Kill the triggerred auto_update process if error happens.
1528
1529        @param host_name: The DUT's hostname.
1530
1531        @return: True if successfully kill the auto-update process for host.
1532        """
1533        kwargs = {'host_name': host_name}
1534        try:
1535            self._kill_au_process_for_host(**kwargs)
1536        except DevServerException:
1537            return False
1538
1539        return True
1540
1541
1542    @remote_devserver_call()
1543    def _clean_track_log(self, **kwargs):
1544        """Clean track log for the current auto-update process."""
1545        call = self.build_call('handler_cleanup', **kwargs)
1546        self.run_call(call)
1547
1548
1549    def clean_track_log(self, host_name, pid):
1550        """Clean track log for the current auto-update process.
1551
1552        @param host_name: The host name to be updated.
1553        @param pid: The auto-update process id.
1554
1555        @return: True if track log is successfully cleaned, False otherwise.
1556        """
1557        if not pid:
1558            return False
1559
1560        kwargs = {'host_name': host_name, 'pid': pid}
1561        try:
1562            self._clean_track_log(**kwargs)
1563        except DevServerException as e:
1564            logging.debug('Failed to clean track_status_file on '
1565                          'devserver for host %s and process id %s: %s',
1566                          host_name, pid, str(e))
1567            return False
1568
1569        return True
1570
1571    @remote_devserver_call()
1572    def _collect_au_log(self, log_dir, **kwargs):
1573        """Collect logs from devserver after cros-update process is finished.
1574
1575        Collect the logs that recording the whole cros-update process, and
1576        write it to sysinfo path of a job.
1577
1578        The example log file name that is stored is like:
1579            '1220-repair/sysinfo/CrOS_update_host_name_pid.log'
1580
1581        @param host_name: the DUT's hostname.
1582        @param pid: the auto-update process id on devserver.
1583        @param log_dir: The directory to save the cros-update process log
1584                        retrieved from devserver.
1585        """
1586        call = self.build_call('collect_cros_au_log', **kwargs)
1587        response = self.run_call(call)
1588        if not os.path.exists(log_dir):
1589            os.mkdir(log_dir)
1590        write_file = os.path.join(
1591                log_dir, 'CrOS_update_%s_%s.log' % (
1592                        kwargs['host_name'], kwargs['pid']))
1593        logging.debug('Saving auto-update logs into %s', write_file)
1594        try:
1595            with open(write_file, 'w') as out_log:
1596                out_log.write(response)
1597        except:
1598            raise DevServerException('Failed to write auto-update logs into '
1599                                     '%s' % write_file)
1600
1601
1602    def collect_au_log(self, host_name, pid, log_dir):
1603        """Collect logs from devserver after cros-update process is finished.
1604
1605        @param host_name: the DUT's hostname.
1606        @param pid: the auto-update process id on devserver.
1607        @param log_dir: The directory to save the cros-update process log
1608                        retrieved from devserver.
1609
1610        @return: True if auto-update log is successfully collected, False
1611          otherwise.
1612        """
1613        if not pid:
1614            return False
1615
1616        kwargs = {'host_name': host_name, 'pid': pid}
1617        try:
1618            self._collect_au_log(log_dir, **kwargs)
1619        except DevServerException as e:
1620            logging.debug('Failed to collect auto-update log on '
1621                          'devserver for host %s and process id %s: %s',
1622                          host_name, pid, str(e))
1623            return False
1624
1625        return True
1626
1627
1628    @remote_devserver_call()
1629    def _trigger_auto_update(self, **kwargs):
1630        """Trigger auto-update by calling devserver.cros_au.
1631
1632        @param kwargs:  Arguments to make cros_au devserver call.
1633
1634        @return: a tuple indicates whether the RPC call cros_au succeeds and
1635          the auto-update process id running on devserver.
1636        """
1637        host_name = kwargs['host_name']
1638        call = self.build_call('cros_au', async=True, **kwargs)
1639        try:
1640            response = self.run_call(call)
1641            logging.info(
1642                'Received response from devserver for cros_au call: %r',
1643                response)
1644        except httplib.BadStatusLine as e:
1645            logging.error(e)
1646            raise DevServerException('Received Bad Status line, Devserver %s '
1647                                     'might have gone down while handling '
1648                                     'the call: %s' % (self.url(), call))
1649
1650        return response
1651
1652
1653    def _wait_for_auto_update_finished(self, pid, **kwargs):
1654        """Polling devserver.get_au_status to get current auto-update status.
1655
1656        The current auto-update status is used to identify whether the update
1657        process is finished.
1658
1659        @param pid:    The background process id for auto-update in devserver.
1660        @param kwargs: keyword arguments to make get_au_status devserver call.
1661
1662        @return: True if auto-update is finished for a given dut.
1663        """
1664        logging.debug('Check the progress for auto-update process %r', pid)
1665        kwargs['pid'] = pid
1666        call = self.build_call('get_au_status', **kwargs)
1667
1668        def all_finished():
1669            """Call devserver.get_au_status rpc to check if auto-update
1670               is finished.
1671
1672            @return: True if auto-update is finished for a given dut. False
1673                     otherwise.
1674            @rasies  DevServerException, the exception is a wrapper of all
1675                     exceptions that were raised when devserver tried to
1676                     download the artifacts. devserver raises an HTTPError or
1677                     a CmdError when an exception was raised in the code. Such
1678                     exception should be re-raised here to stop the caller from
1679                     waiting. If the call to devserver failed for connection
1680                     issue, a URLError exception is raised, and caller should
1681                     retry the call to avoid such network flakiness.
1682
1683            """
1684            try:
1685                response = json.loads(self.run_call(call))
1686                # This is a temp fix to fit both dict and tuple returning
1687                # values. The dict check will be removed after a corresponding
1688                # devserver CL is deployed.
1689                if isinstance(response, dict):
1690                    if response.get('detailed_error_msg'):
1691                        raise DevServerException(
1692                                response.get('detailed_error_msg'))
1693
1694                    if response.get('finished'):
1695                        logging.debug('CrOS auto-update is finished')
1696                        return True
1697                    else:
1698                        logging.debug('Current CrOS auto-update status: %s',
1699                                      response.get('status'))
1700                        return False
1701
1702                if not response[0]:
1703                    logging.debug('Current CrOS auto-update status: %s',
1704                                  response[1])
1705                    return False
1706                else:
1707                    logging.debug('CrOS auto-update is finished')
1708                    return True
1709            except urllib2.HTTPError as e:
1710                error_markup = e.read()
1711                raise DevServerException(_strip_http_message(error_markup))
1712            except urllib2.URLError as e:
1713                # Could be connection issue, retry it.
1714                # For example: <urlopen error [Errno 111] Connection refused>
1715                logging.warning('URLError (%r): Retrying connection to '
1716                                'devserver to check auto-update status.', e)
1717                return False
1718            except error.CmdError:
1719                # Retry if SSH failed to connect to the devserver.
1720                logging.warning('CmdError: Retrying SSH connection to check '
1721                                'auto-update status.')
1722                return False
1723            except socket.error as e:
1724                # Could be some temporary devserver connection issues.
1725                logging.warning('Socket Error (%r): Retrying connection to '
1726                                'devserver to check auto-update status.', e)
1727                return False
1728
1729        site_utils.poll_for_condition(
1730                all_finished,
1731                exception=site_utils.TimeoutError(),
1732                timeout=DEVSERVER_IS_CROS_AU_FINISHED_TIMEOUT_MIN * 60,
1733                sleep_interval=CROS_AU_POLLING_INTERVAL)
1734
1735        return True
1736
1737
1738    def wait_for_auto_update_finished(self, response, **kwargs):
1739        """Processing response of 'cros_au' and polling for auto-update status.
1740
1741        Will wait for the whole auto-update process is finished.
1742
1743        @param response: The response from RPC 'cros_au'
1744        @param kwargs: keyword arguments to make get_au_status devserver call.
1745
1746        @return: a tuple includes two elements.
1747          raised_error: None if everything works well or the raised error.
1748          pid: the auto-update process id on devserver.
1749        """
1750
1751        pid = 0
1752        raised_error = None
1753        try:
1754            response = json.loads(response)
1755            if response[0]:
1756                pid = response[1]
1757                logging.debug('start process %r for auto_update in devserver',
1758                              pid)
1759                self._wait_for_auto_update_finished(pid, **kwargs)
1760        except Exception as e:
1761            logging.debug('Failed to trigger auto-update process on devserver')
1762            raised_error = e
1763        finally:
1764            return raised_error, pid
1765
1766
1767    def _parse_AU_error(self, response):
1768        """Parse auto_update error returned from devserver."""
1769        return re.split('\n', response)[-1]
1770
1771
1772    def auto_update(self, host_name, build_name, log_dir=None,
1773                    force_update=False, full_update=False):
1774        """Auto-update a CrOS host.
1775
1776        @param host_name:    The hostname of the DUT to auto-update.
1777        @param build_name:   The build name to be auto-updated on the DUT.
1778        @param log_dir:      The log directory to store auto-update logs from
1779                             devserver.
1780        @param force_update: Force an update even if the version installed
1781                             is the same. Default: False.
1782        @param full_update:  If True, do not run stateful update, directly
1783                             force a full reimage. If False, try stateful
1784                             update first if the dut is already installed
1785                             with the same version.
1786        """
1787        kwargs = {'host_name': host_name,
1788                  'build_name': build_name,
1789                  'force_update': force_update,
1790                  'full_update': full_update}
1791
1792        error_msg = 'CrOS auto-update failed for host %s: %s'
1793        error_msg_attempt = 'Exception raised on auto_update attempt #%s:\n%s'
1794        is_au_success = False
1795        au_log_dir = os.path.join(log_dir,
1796                                  AUTO_UPDATE_LOG_DIR) if log_dir else None
1797        error_list = []
1798        for au_attempt in range(AU_RETRY_LIMIT):
1799            logging.debug('Start CrOS auto-update for host %s at %d time(s).',
1800                          host_name, au_attempt + 1)
1801            # No matter _start_auto_update succeeds or fails, the auto-update
1802            # track_status_file should be cleaned, and the auto-update execute
1803            # log should be collected to directory sysinfo. Also, the error
1804            # raised by _start_auto_update should be displayed.
1805            try:
1806                response = self._trigger_auto_update(**kwargs)
1807            except DevServerException as e:
1808                logging.debug(error_msg_attempt, au_attempt+1, str(e))
1809                error_list.append(str(e))
1810            else:
1811                raised_error, pid = self.wait_for_auto_update_finished(response,
1812                                                                       **kwargs)
1813                # Error happens in _clean_track_log won't be raised. Auto-update
1814                # process will be retried.
1815                is_clean_success = self.clean_track_log(host_name, pid)
1816                # Error happens in _collect_au_log won't be raised. Auto-update
1817                # process will be retried.
1818                if au_log_dir:
1819                    is_collect_success = self.collect_au_log(
1820                            host_name, pid, au_log_dir)
1821                else:
1822                    is_collect_success = True
1823                # If any error is raised previously, log it and retry
1824                # auto-update. Otherwise, claim a success CrOS auto-update.
1825                if not raised_error and is_clean_success and is_collect_success:
1826                    logging.debug('CrOS auto-update succeed for host %s',
1827                                  host_name)
1828                    is_au_success = True
1829                    break
1830                else:
1831                    if raised_error:
1832                        logging.debug(error_msg_attempt, au_attempt+1,
1833                                      str(raised_error))
1834                        error_list.append(self._parse_AU_error(str(raised_error)))
1835                    if not self.kill_au_process_for_host(host_name):
1836                        logging.debug('Failed to kill auto_update process %d',
1837                                      pid)
1838
1839            finally:
1840                if not is_au_success and au_attempt < AU_RETRY_LIMIT - 1:
1841                    time.sleep(CROS_AU_RETRY_INTERVAL)
1842                    # TODO(kevcheng): Remove this once crbug.com/651974 is
1843                    # fixed.
1844                    # DNS is broken in the cassandra lab, so use the IP of the
1845                    # hostname instead if it fails. Not rename host_name here
1846                    # for error msg reporting.
1847                    host_name_ip = socket.gethostbyname(host_name)
1848                    kwargs['host_name'] = host_name_ip
1849                    logging.debug(
1850                            'AU failed, trying IP instead of hostname: %s',
1851                            host_name_ip)
1852
1853        if not is_au_success:
1854            # If errors happen in the CrOS AU process, report the first error
1855            # since the following errors might be caused by the first error.
1856            # If error happens in RPCs of cleaning track log, collecting
1857            # auto-update logs, or killing auto-update processes, just report
1858            # them together.
1859            if error_list:
1860                raise DevServerException(error_msg % (host_name, error_list[0]))
1861            else:
1862                raise DevServerException(error_msg % (
1863                        host_name, ('RPC calls after the whole auto-update '
1864                                    'process failed.')))
1865
1866
1867class AndroidBuildServer(ImageServerBase):
1868    """Class for DevServer that handles RPCs related to Android builds.
1869
1870    The calls to devserver to stage artifacts, including stage and download, are
1871    made in async mode. That is, when caller makes an RPC |stage| to request
1872    devserver to stage certain artifacts, devserver handles the call and starts
1873    staging artifacts in a new thread, and return |Success| without waiting for
1874    staging being completed. When caller receives message |Success|, it polls
1875    devserver's is_staged call until all artifacts are staged.
1876    Such mechanism is designed to prevent cherrypy threads in devserver being
1877    running out, as staging artifacts might take long time, and cherrypy starts
1878    with a fixed number of threads that handle devserver rpc.
1879    """
1880
1881    def wait_for_artifacts_staged(self, target, build_id, branch,
1882                                  archive_url=None, artifacts='', files=''):
1883        """Polling devserver.is_staged until all artifacts are staged.
1884
1885        @param target: Target of the android build to stage, e.g.,
1886                       shamu-userdebug.
1887        @param build_id: Build id of the android build to stage.
1888        @param branch: Branch of the android build to stage.
1889        @param archive_url: Google Storage URL for the build.
1890        @param artifacts: Comma separated list of artifacts to download.
1891        @param files: Comma separated list of files to download.
1892
1893        @return: True if all artifacts are staged in devserver.
1894        """
1895        kwargs = {'target': target,
1896                  'build_id': build_id,
1897                  'branch': branch,
1898                  'artifacts': artifacts,
1899                  'files': files,
1900                  'os_type': 'android'}
1901        if archive_url:
1902            kwargs['archive_url'] = archive_url
1903        return self._poll_is_staged(**kwargs)
1904
1905
1906    @remote_devserver_call()
1907    def call_and_wait(self, call_name, target, build_id, branch, archive_url,
1908                      artifacts, files, error_message,
1909                      expected_response=SUCCESS):
1910        """Helper method to make a urlopen call, and wait for artifacts staged.
1911
1912        @param call_name: name of devserver rpc call.
1913        @param target: Target of the android build to stage, e.g.,
1914                       shamu-userdebug.
1915        @param build_id: Build id of the android build to stage.
1916        @param branch: Branch of the android build to stage.
1917        @param archive_url: Google Storage URL for the CrOS build.
1918        @param artifacts: Comma separated list of artifacts to download.
1919        @param files: Comma separated list of files to download.
1920        @param expected_response: Expected response from rpc, default to
1921                                  |Success|. If it's set to None, do not compare
1922                                  the actual response. Any response is consider
1923                                  to be good.
1924        @param error_message: Error message to be thrown if response does not
1925                              match expected_response.
1926
1927        @return: The response from rpc.
1928        @raise DevServerException upon any return code that's expected_response.
1929
1930        """
1931        kwargs = {'target': target,
1932                  'build_id': build_id,
1933                  'branch': branch,
1934                  'artifacts': artifacts,
1935                  'files': files,
1936                  'os_type': 'android'}
1937        if archive_url:
1938            kwargs['archive_url'] = archive_url
1939        return self._call_and_wait(call_name, error_message, expected_response,
1940                                   **kwargs)
1941
1942
1943    @remote_devserver_call()
1944    def stage_artifacts(self, target=None, build_id=None, branch=None,
1945                        image=None, artifacts=None, files='', archive_url=None):
1946        """Tell the devserver to download and stage |artifacts| from |image|.
1947
1948         This is the main call point for staging any specific artifacts for a
1949        given build. To see the list of artifacts one can stage see:
1950
1951        ~src/platfrom/dev/artifact_info.py.
1952
1953        This is maintained along with the actual devserver code.
1954
1955        @param target: Target of the android build to stage, e.g.,
1956                               shamu-userdebug.
1957        @param build_id: Build id of the android build to stage.
1958        @param branch: Branch of the android build to stage.
1959        @param image: Name of a build to test, in the format of
1960                      branch/target/build_id
1961        @param artifacts: A list of artifacts.
1962        @param files: A list of files to stage.
1963        @param archive_url: Optional parameter that has the archive_url to stage
1964                this artifact from. Default is specified in autotest config +
1965                image.
1966
1967        @raise DevServerException upon any return code that's not HTTP OK.
1968        """
1969        if image and not target and not build_id and not branch:
1970            branch, target, build_id = utils.parse_launch_control_build(image)
1971        if not target or not build_id or not branch:
1972            raise DevServerException('Must specify all build info (target, '
1973                                     'build_id and branch) to stage.')
1974
1975        android_build_info = {'target': target,
1976                              'build_id': build_id,
1977                              'branch': branch}
1978        if not artifacts and not files:
1979            raise DevServerException('Must specify something to stage.')
1980        if not all(android_build_info.values()):
1981            raise DevServerException(
1982                    'To stage an Android build, must specify target, build id '
1983                    'and branch.')
1984        build = ANDROID_BUILD_NAME_PATTERN % android_build_info
1985        self._stage_artifacts(build, artifacts, files, archive_url,
1986                              **android_build_info)
1987
1988
1989    def trigger_download(self, target, build_id, branch, artifacts=None,
1990                         files='', os='android', synchronous=True):
1991        """Tell the devserver to download and stage an Android build.
1992
1993        Tells the devserver to fetch an Android build from the image storage
1994        server named by _get_image_storage_server().
1995
1996        If |synchronous| is True, waits for the entire download to finish
1997        staging before returning. Otherwise only the artifacts necessary
1998        to start installing images onto DUT's will be staged before returning.
1999        A caller can then call finish_download to guarantee the rest of the
2000        artifacts have finished staging.
2001
2002        @param target: Target of the android build to stage, e.g.,
2003                       shamu-userdebug.
2004        @param build_id: Build id of the android build to stage.
2005        @param branch: Branch of the android build to stage.
2006        @param artifacts: A string of artifacts separated by comma. If None,
2007               use the default artifacts for Android or Brillo build.
2008        @param files: String of file seperated by commas.
2009        @param os: OS artifacts to download (android/brillo).
2010        @param synchronous: if True, waits until all components of the image are
2011               staged before returning.
2012
2013        @raise DevServerException upon any return code that's not HTTP OK.
2014
2015        """
2016        android_build_info = {'target': target,
2017                              'build_id': build_id,
2018                              'branch': branch}
2019        build = ANDROID_BUILD_NAME_PATTERN % android_build_info
2020        if not artifacts:
2021            board = target.split('-')[0]
2022            artifacts = (
2023                android_utils.AndroidArtifacts.get_artifacts_for_reimage(
2024                        board, os))
2025        self._trigger_download(build, artifacts, files=files,
2026                               synchronous=synchronous, **android_build_info)
2027
2028
2029    def finish_download(self, target, build_id, branch, os='android'):
2030        """Tell the devserver to finish staging an Android build.
2031
2032        If trigger_download is called with synchronous=False, it will return
2033        before all artifacts have been staged. This method contacts the
2034        devserver and blocks until all staging is completed and should be
2035        called after a call to trigger_download.
2036
2037        @param target: Target of the android build to stage, e.g.,
2038                       shamu-userdebug.
2039        @param build_id: Build id of the android build to stage.
2040        @param branch: Branch of the android build to stage.
2041        @param os: OS artifacts to download (android/brillo).
2042
2043        @raise DevServerException upon any return code that's not HTTP OK.
2044        """
2045        android_build_info = {'target': target,
2046                              'build_id': build_id,
2047                              'branch': branch}
2048        build = ANDROID_BUILD_NAME_PATTERN % android_build_info
2049        board = target.split('-')[0]
2050        artifacts = (
2051                android_utils.AndroidArtifacts.get_artifacts_for_reimage(
2052                        board))
2053        self._finish_download(build, artifacts, files='', **android_build_info)
2054
2055
2056    def get_staged_file_url(self, filename, target, build_id, branch):
2057        """Returns the url of a staged file for this image on the devserver.
2058
2059        @param filename: Name of the file.
2060        @param target: Target of the android build to stage, e.g.,
2061                       shamu-userdebug.
2062        @param build_id: Build id of the android build to stage.
2063        @param branch: Branch of the android build to stage.
2064
2065        @return: The url of a staged file for this image on the devserver.
2066        """
2067        android_build_info = {'target': target,
2068                              'build_id': build_id,
2069                              'branch': branch,
2070                              'os_type': 'android'}
2071        build = ANDROID_BUILD_NAME_PATTERN % android_build_info
2072        return '/'.join([self._get_image_url(build), filename])
2073
2074
2075    @remote_devserver_call()
2076    def translate(self, build_name):
2077        """Translate the build name if it's in LATEST format.
2078
2079        If the build name is in the format [branch]/[target]/LATEST, return the
2080        latest build in Launch Control otherwise return the build name as is.
2081
2082        @param build_name: build_name to check.
2083
2084        @return The actual build name to use.
2085        """
2086        branch, target, build_id = utils.parse_launch_control_build(build_name)
2087        if build_id.upper() != 'LATEST':
2088            return build_name
2089        call = self.build_call('latestbuild', branch=branch, target=target,
2090                               os_type='android')
2091        translated_build_id = self.run_call(call)
2092        translated_build = (ANDROID_BUILD_NAME_PATTERN %
2093                            {'branch': branch,
2094                             'target': target,
2095                             'build_id': translated_build_id})
2096        logging.debug('Translated relative build %s to %s', build_name,
2097                      translated_build)
2098        return translated_build
2099
2100
2101def _is_load_healthy(load):
2102    """Check if devserver's load meets the minimum threshold.
2103
2104    @param load: The devserver's load stats to check.
2105
2106    @return: True if the load meets the minimum threshold. Return False
2107             otherwise.
2108
2109    """
2110    # Threshold checks, including CPU load.
2111    if load[DevServer.CPU_LOAD] > DevServer.MAX_CPU_LOAD:
2112        logging.debug('CPU load of devserver %s is at %s%%, which is higher '
2113                      'than the threshold of %s%%', load['devserver'],
2114                      load[DevServer.CPU_LOAD], DevServer.MAX_CPU_LOAD)
2115        return False
2116    if load[DevServer.NETWORK_IO] > DevServer.MAX_NETWORK_IO:
2117        logging.debug('Network IO of devserver %s is at %i Bps, which is '
2118                      'higher than the threshold of %i bytes per second.',
2119                      load['devserver'], load[DevServer.NETWORK_IO],
2120                      DevServer.MAX_NETWORK_IO)
2121        return False
2122    return True
2123
2124
2125def _compare_load(devserver1, devserver2):
2126    """Comparator function to compare load between two devservers.
2127
2128    @param devserver1: A dictionary of devserver load stats to be compared.
2129    @param devserver2: A dictionary of devserver load stats to be compared.
2130
2131    @return: Negative value if the load of `devserver1` is less than the load
2132             of `devserver2`. Return positive value otherwise.
2133
2134    """
2135    return int(devserver1[DevServer.DISK_IO] - devserver2[DevServer.DISK_IO])
2136
2137
2138def get_least_loaded_devserver(devserver_type=ImageServer, hostname=None):
2139    """Get the devserver with the least load.
2140
2141    Iterate through all devservers and get the one with least load.
2142
2143    TODO(crbug.com/486278): Devserver with required build already staged should
2144    take higher priority. This will need check_health call to be able to verify
2145    existence of a given build/artifact. Also, in case all devservers are
2146    overloaded, the logic here should fall back to the old behavior that randomly
2147    selects a devserver based on the hash of the image name/url.
2148
2149    @param devserver_type: Type of devserver to select from. Default is set to
2150                           ImageServer.
2151    @param hostname: Hostname of the dut that the devserver is used for. The
2152            picked devserver needs to respect the location of the host if
2153            `prefer_local_devserver` is set to True or `restricted_subnets` is
2154            set.
2155
2156    @return: Name of the devserver with the least load.
2157
2158    """
2159    devservers, can_retry = devserver_type.get_available_devservers(
2160            hostname)
2161    # If no healthy devservers available and can_retry is False, return None.
2162    # Otherwise, relax the constrain on hostname, allow all devservers to be
2163    # available.
2164    if not devserver_type.get_healthy_devserver('', devservers):
2165        if not can_retry:
2166            return None
2167        else:
2168            devservers, _ = devserver_type.get_available_devservers()
2169
2170    # get_devserver_load call needs to be made in a new process to allow force
2171    # timeout using signal.
2172    output = multiprocessing.Queue()
2173    processes = []
2174    for devserver in devservers:
2175        processes.append(multiprocessing.Process(
2176                target=devserver_type.get_devserver_load_wrapper,
2177                args=(devserver, TIMEOUT_GET_DEVSERVER_LOAD, output)))
2178
2179    for p in processes:
2180        p.start()
2181    for p in processes:
2182        p.join()
2183    loads = [output.get() for p in processes]
2184    # Filter out any load failed to be retrieved or does not support load check.
2185    loads = [load for load in loads if load and DevServer.CPU_LOAD in load and
2186             DevServer.is_free_disk_ok(load) and
2187             DevServer.is_apache_client_count_ok(load)]
2188    if not loads:
2189        logging.debug('Failed to retrieve load stats from any devserver. No '
2190                      'load balancing can be applied.')
2191        return None
2192    loads = [load for load in loads if _is_load_healthy(load)]
2193    if not loads:
2194        logging.error('No devserver has the capacity to be selected.')
2195        return None
2196    loads = sorted(loads, cmp=_compare_load)
2197    return loads[0]['devserver']
2198
2199
2200def resolve(build, hostname=None):
2201    """Resolve a devserver can be used for given build and hostname.
2202
2203    @param build: Name of a build to stage on devserver, e.g.,
2204                  ChromeOS build: daisy-release/R50-1234.0.0
2205                  Launch Control build: git_mnc_release/shamu-eng
2206    @param hostname: Hostname of a devserver for, default is None, which means
2207            devserver is not restricted by the network location of the host.
2208
2209    @return: A DevServer instance that can be used to stage given build for the
2210             given host.
2211    """
2212    if utils.is_launch_control_build(build):
2213        return AndroidBuildServer.resolve(build, hostname)
2214    else:
2215        return ImageServer.resolve(build, hostname)
2216