1# Copyright (c) 2014 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import contextlib
6import datetime
7import logging
8import pprint
9import time
10
11import common
12from autotest_lib.client.common_lib import error
13from autotest_lib.client.common_lib.cros.network import ap_constants
14from autotest_lib.client.common_lib.cros.network import iw_runner
15from autotest_lib.server import hosts
16from autotest_lib.server import site_linux_system
17from autotest_lib.server.cros import host_lock_manager
18from autotest_lib.server.cros.ap_configurators import ap_batch_locker
19from autotest_lib.server.cros.network import chaos_clique_utils as utils
20from autotest_lib.server.cros.network import wifi_client
21
22# Webdriver master hostname
23MASTERNAME = 'chromeos3-chaosvmmaster.cros.corp.google.com'
24
25
26class ChaosRunner(object):
27    """Object to run a network_WiFi_ChaosXXX test."""
28
29
30    def __init__(self, test, host, spec, broken_pdus=list()):
31        """Initializes and runs test.
32
33        @param test: a string, test name.
34        @param host: an Autotest host object, device under test.
35        @param spec: an APSpec object.
36        @param broken_pdus: list of offline PDUs.
37
38        """
39        self._test = test
40        self._host = host
41        self._ap_spec = spec
42        self._broken_pdus = broken_pdus
43        # Log server and DUT times
44        dt = datetime.datetime.now()
45        logging.info('Server time: %s', dt.strftime('%a %b %d %H:%M:%S %Y'))
46        logging.info('DUT time: %s', self._host.run('date').stdout.strip())
47
48
49    def run(self, job, batch_size=7, tries=10, capturer_hostname=None,
50            conn_worker=None, work_client_hostname=None,
51            disabled_sysinfo=False):
52        """Executes Chaos test.
53
54        @param job: an Autotest job object.
55        @param batch_size: an integer, max number of APs to lock in one batch.
56        @param tries: an integer, number of iterations to run per AP.
57        @param capturer_hostname: a string or None, hostname or IP of capturer.
58        @param conn_worker: ConnectionWorkerAbstract or None, to run extra
59                            work after successful connection.
60        @param work_client_hostname: a string or None, hostname of work client
61        @param disabled_sysinfo: a bool, disable collection of logs from DUT.
62
63
64        @raises TestError: Issues locking VM webdriver instance
65        """
66
67        lock_manager = host_lock_manager.HostLockManager()
68        webdriver_master = hosts.SSHHost(MASTERNAME, user='chaosvmmaster')
69        with host_lock_manager.HostsLockedBy(lock_manager):
70            capture_host = utils.allocate_packet_capturer(
71                    lock_manager, hostname=capturer_hostname)
72            # Cleanup and reboot packet capturer before the test.
73            utils.sanitize_client(capture_host)
74            capturer = site_linux_system.LinuxSystem(capture_host, {},
75                                                     'packet_capturer')
76
77            # Run iw scan and abort if more than allowed number of APs are up.
78            iw_command = iw_runner.IwRunner(capture_host)
79            start_time = time.time()
80            logging.info('Performing a scan with a max timeout of 30 seconds.')
81            while time.time() - start_time <= ap_constants.MAX_SCAN_TIMEOUT:
82                networks = iw_command.scan('wlan0')
83                if networks is None:
84                    if (time.time() - start_time ==
85                            ap_constants.MAX_SCAN_TIMEOUT):
86                        raise error.TestError(
87                            'Packet capturer is not responding to scans. Check'
88                            'device and re-run test')
89                    continue
90                elif len(networks) < ap_constants.MAX_SSID_COUNT:
91                    break
92                elif len(networks) >= ap_constants.MAX_SSID_COUNT:
93                    raise error.TestError(
94                        'Probably someone is already running a'
95                        'chaos test?!')
96
97            if conn_worker is not None:
98                work_client_machine = utils.allocate_packet_capturer(
99                        lock_manager, hostname=work_client_hostname)
100                conn_worker.prepare_work_client(work_client_machine)
101
102            webdriver_instance = utils.allocate_webdriver_instance(lock_manager)
103            self._ap_spec._webdriver_hostname = webdriver_instance
104
105            # If a test is cancelled or aborted the VM may be left on.  Always
106            # turn of the VM to return it to a clean state.
107            try:
108                logging.info('Always power off VM %s', webdriver_instance)
109                utils.power_off_VM(webdriver_master, webdriver_instance)
110            except:
111                logging.debug('VM was already off, ignoring.')
112
113            logging.info('Starting up VM %s', webdriver_instance)
114            utils.power_on_VM(webdriver_master, webdriver_instance)
115
116            batch_locker = ap_batch_locker.ApBatchLocker(
117                    lock_manager, self._ap_spec,
118                    ap_test_type=ap_constants.AP_TEST_TYPE_CHAOS)
119
120            while batch_locker.has_more_aps():
121                # Work around crbug.com/358716
122                utils.sanitize_client(self._host)
123                healthy_dut = True
124
125                with contextlib.closing(wifi_client.WiFiClient(
126                    hosts.create_host(self._host.hostname),
127                    './debug',
128                    False)) as client:
129
130                    aps = batch_locker.get_ap_batch(batch_size=batch_size)
131                    if not aps:
132                        logging.info('No more APs to test.')
133                        break
134
135                    # Power down all of the APs because some can get grumpy
136                    # if they are configured several times and remain on.
137                    # User the cartridge to down group power downs and
138                    # configurations.
139                    utils.power_down_aps(aps, self._broken_pdus)
140                    utils.configure_aps(aps, self._ap_spec, self._broken_pdus)
141
142                    aps = utils.filter_quarantined_and_config_failed_aps(aps,
143                            batch_locker, job, self._broken_pdus)
144
145                    for ap in aps:
146                        # http://crbug.com/306687
147                        if ap.ssid == None:
148                            logging.error('The SSID was not set for the AP:%s',
149                                          ap)
150
151                        healthy_dut = utils.is_dut_healthy(client, ap)
152
153                        if not healthy_dut:
154                            logging.error('DUT is not healthy, rebooting.')
155                            batch_locker.unlock_and_reclaim_aps()
156                            break
157
158                        networks = utils.return_available_networks(
159                                ap, capturer, job, self._ap_spec)
160
161                        if networks is None:
162                            # If scan returned no networks, iw scan failed.
163                            # Reboot the packet capturer device and
164                            # reconfigure the capturer.
165                            batch_locker.unlock_and_reclaim_ap(ap.host_name)
166                            logging.error('Packet capture is not healthy, '
167                                          'rebooting.')
168                            capturer.host.reboot()
169                            capturer = site_linux_system.LinuxSystem(
170                                           capture_host, {},'packet_capturer')
171                            continue
172                        if networks == list():
173                           # Packet capturer did not find the SSID in scan or
174                           # there was a security mismatch.
175                           utils.release_ap(ap, batch_locker, self._broken_pdus)
176                           continue
177
178                        assoc_params = ap.get_association_parameters()
179
180                        if not utils.is_conn_worker_healthy(
181                                conn_worker, ap, assoc_params, job):
182                            utils.release_ap(
183                                    ap, batch_locker, self._broken_pdus)
184                            continue
185
186                        name = ap.name
187                        kernel_ver = self._host.get_kernel_ver()
188                        firmware_ver = utils.get_firmware_ver(self._host)
189                        if not firmware_ver:
190                            firmware_ver = "Unknown"
191
192                        debug_dict = {'+++PARSE DATA+++': '+++PARSE DATA+++',
193                                      'SSID': ap._ssid,
194                                      'DUT': client.wifi_mac,
195                                      'AP Info': ap.name,
196                                      'kernel_version': kernel_ver,
197                                      'wifi_firmware_version': firmware_ver}
198                        debug_string = pprint.pformat(debug_dict)
199
200                        logging.info('Waiting %d seconds for the AP dhcp '
201                                     'server', ap.dhcp_delay)
202                        time.sleep(ap.dhcp_delay)
203
204                        result = job.run_test(self._test,
205                                     capturer=capturer,
206                                     capturer_frequency=networks[0].frequency,
207                                     capturer_ht_type=networks[0].ht,
208                                     host=self._host,
209                                     assoc_params=assoc_params,
210                                     client=client,
211                                     tries=tries,
212                                     debug_info=debug_string,
213                                     # Copy all logs from the system
214                                     disabled_sysinfo=disabled_sysinfo,
215                                     conn_worker=conn_worker,
216                                     tag=ap.ssid if conn_worker is None else
217                                         '%s.%s' % (conn_worker.name, ap.ssid))
218
219                        utils.release_ap(ap, batch_locker, self._broken_pdus)
220
221                        if conn_worker is not None:
222                            conn_worker.cleanup()
223
224                    if not healthy_dut:
225                        continue
226
227                batch_locker.unlock_aps()
228            capturer.close()
229            logging.info('Powering off VM %s', webdriver_instance)
230            utils.power_off_VM(webdriver_master, webdriver_instance)
231            lock_manager.unlock(webdriver_instance)
232
233            if self._broken_pdus:
234                logging.info('PDU is down!!!\nThe following PDUs are down:\n')
235                pprint.pprint(self._broken_pdus)
236